1 //===- ValueTracking.cpp - Walk computations to compute properties --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains routines that help analyze properties that chains of 10 // computations have. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/ValueTracking.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/ScopeExit.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/ADT/iterator_range.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Analysis/AssumeBundleQueries.h" 27 #include "llvm/Analysis/AssumptionCache.h" 28 #include "llvm/Analysis/ConstantFolding.h" 29 #include "llvm/Analysis/DomConditionCache.h" 30 #include "llvm/Analysis/GuardUtils.h" 31 #include "llvm/Analysis/InstructionSimplify.h" 32 #include "llvm/Analysis/Loads.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 35 #include "llvm/Analysis/TargetLibraryInfo.h" 36 #include "llvm/Analysis/VectorUtils.h" 37 #include "llvm/Analysis/WithCache.h" 38 #include "llvm/IR/Argument.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Constant.h" 42 #include "llvm/IR/ConstantRange.h" 43 #include "llvm/IR/Constants.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/DiagnosticInfo.h" 46 #include "llvm/IR/Dominators.h" 47 #include "llvm/IR/EHPersonalities.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalAlias.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/GlobalVariable.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/IntrinsicsAArch64.h" 59 #include "llvm/IR/IntrinsicsAMDGPU.h" 60 #include "llvm/IR/IntrinsicsRISCV.h" 61 #include "llvm/IR/IntrinsicsX86.h" 62 #include "llvm/IR/LLVMContext.h" 63 #include "llvm/IR/Metadata.h" 64 #include "llvm/IR/Module.h" 65 #include "llvm/IR/Operator.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/ErrorHandling.h" 74 #include "llvm/Support/KnownBits.h" 75 #include "llvm/Support/MathExtras.h" 76 #include <algorithm> 77 #include <cassert> 78 #include <cstdint> 79 #include <optional> 80 #include <utility> 81 82 using namespace llvm; 83 using namespace llvm::PatternMatch; 84 85 // Controls the number of uses of the value searched for possible 86 // dominating comparisons. 87 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", 88 cl::Hidden, cl::init(20)); 89 90 91 /// Returns the bitwidth of the given scalar or pointer type. For vector types, 92 /// returns the element type's bitwidth. 93 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { 94 if (unsigned BitWidth = Ty->getScalarSizeInBits()) 95 return BitWidth; 96 97 return DL.getPointerTypeSizeInBits(Ty); 98 } 99 100 // Given the provided Value and, potentially, a context instruction, return 101 // the preferred context instruction (if any). 102 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { 103 // If we've been provided with a context instruction, then use that (provided 104 // it has been inserted). 105 if (CxtI && CxtI->getParent()) 106 return CxtI; 107 108 // If the value is really an already-inserted instruction, then use that. 109 CxtI = dyn_cast<Instruction>(V); 110 if (CxtI && CxtI->getParent()) 111 return CxtI; 112 113 return nullptr; 114 } 115 116 static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) { 117 // If we've been provided with a context instruction, then use that (provided 118 // it has been inserted). 119 if (CxtI && CxtI->getParent()) 120 return CxtI; 121 122 // If the value is really an already-inserted instruction, then use that. 123 CxtI = dyn_cast<Instruction>(V1); 124 if (CxtI && CxtI->getParent()) 125 return CxtI; 126 127 CxtI = dyn_cast<Instruction>(V2); 128 if (CxtI && CxtI->getParent()) 129 return CxtI; 130 131 return nullptr; 132 } 133 134 static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf, 135 const APInt &DemandedElts, 136 APInt &DemandedLHS, APInt &DemandedRHS) { 137 if (isa<ScalableVectorType>(Shuf->getType())) { 138 assert(DemandedElts == APInt(1,1)); 139 DemandedLHS = DemandedRHS = DemandedElts; 140 return true; 141 } 142 143 int NumElts = 144 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements(); 145 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(), 146 DemandedElts, DemandedLHS, DemandedRHS); 147 } 148 149 static void computeKnownBits(const Value *V, const APInt &DemandedElts, 150 KnownBits &Known, unsigned Depth, 151 const SimplifyQuery &Q); 152 153 void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, 154 const SimplifyQuery &Q) { 155 // Since the number of lanes in a scalable vector is unknown at compile time, 156 // we track one bit which is implicitly broadcast to all lanes. This means 157 // that all lanes in a scalable vector are considered demanded. 158 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 159 APInt DemandedElts = 160 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 161 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 162 } 163 164 void llvm::computeKnownBits(const Value *V, KnownBits &Known, 165 const DataLayout &DL, unsigned Depth, 166 AssumptionCache *AC, const Instruction *CxtI, 167 const DominatorTree *DT, bool UseInstrInfo) { 168 computeKnownBits( 169 V, Known, Depth, 170 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 171 } 172 173 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, 174 unsigned Depth, AssumptionCache *AC, 175 const Instruction *CxtI, 176 const DominatorTree *DT, bool UseInstrInfo) { 177 return computeKnownBits( 178 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 179 } 180 181 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 182 const DataLayout &DL, unsigned Depth, 183 AssumptionCache *AC, const Instruction *CxtI, 184 const DominatorTree *DT, bool UseInstrInfo) { 185 return computeKnownBits( 186 V, DemandedElts, Depth, 187 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 188 } 189 190 static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS, 191 const SimplifyQuery &SQ) { 192 // Look for an inverted mask: (X & ~M) op (Y & M). 193 { 194 Value *M; 195 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && 196 match(RHS, m_c_And(m_Specific(M), m_Value())) && 197 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT)) 198 return true; 199 } 200 201 // X op (Y & ~X) 202 if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) && 203 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 204 return true; 205 206 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern 207 // for constant Y. 208 Value *Y; 209 if (match(RHS, 210 m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) && 211 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) && 212 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 213 return true; 214 215 // Peek through extends to find a 'not' of the other side: 216 // (ext Y) op ext(~Y) 217 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) && 218 match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) && 219 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) 220 return true; 221 222 // Look for: (A & B) op ~(A | B) 223 { 224 Value *A, *B; 225 if (match(LHS, m_And(m_Value(A), m_Value(B))) && 226 match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) && 227 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) && 228 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT)) 229 return true; 230 } 231 232 return false; 233 } 234 235 bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache, 236 const WithCache<const Value *> &RHSCache, 237 const SimplifyQuery &SQ) { 238 const Value *LHS = LHSCache.getValue(); 239 const Value *RHS = RHSCache.getValue(); 240 241 assert(LHS->getType() == RHS->getType() && 242 "LHS and RHS should have the same type"); 243 assert(LHS->getType()->isIntOrIntVectorTy() && 244 "LHS and RHS should be integers"); 245 246 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) || 247 haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ)) 248 return true; 249 250 return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), 251 RHSCache.getKnownBits(SQ)); 252 } 253 254 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { 255 return !I->user_empty() && all_of(I->users(), [](const User *U) { 256 ICmpInst::Predicate P; 257 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P); 258 }); 259 } 260 261 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 262 const SimplifyQuery &Q); 263 264 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, 265 bool OrZero, unsigned Depth, 266 AssumptionCache *AC, const Instruction *CxtI, 267 const DominatorTree *DT, bool UseInstrInfo) { 268 return ::isKnownToBeAPowerOfTwo( 269 V, OrZero, Depth, 270 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 271 } 272 273 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, 274 unsigned Depth, const SimplifyQuery &Q); 275 276 static bool isKnownNonZero(const Value *V, unsigned Depth, 277 const SimplifyQuery &Q); 278 279 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, 280 AssumptionCache *AC, const Instruction *CxtI, 281 const DominatorTree *DT, bool UseInstrInfo) { 282 return ::isKnownNonZero( 283 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 284 } 285 286 bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, 287 unsigned Depth) { 288 return computeKnownBits(V, Depth, SQ).isNonNegative(); 289 } 290 291 bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ, 292 unsigned Depth) { 293 if (auto *CI = dyn_cast<ConstantInt>(V)) 294 return CI->getValue().isStrictlyPositive(); 295 296 // TODO: We'd doing two recursive queries here. We should factor this such 297 // that only a single query is needed. 298 return isKnownNonNegative(V, SQ, Depth) && ::isKnownNonZero(V, Depth, SQ); 299 } 300 301 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ, 302 unsigned Depth) { 303 return computeKnownBits(V, Depth, SQ).isNegative(); 304 } 305 306 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 307 const SimplifyQuery &Q); 308 309 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, 310 const DataLayout &DL, AssumptionCache *AC, 311 const Instruction *CxtI, const DominatorTree *DT, 312 bool UseInstrInfo) { 313 return ::isKnownNonEqual( 314 V1, V2, 0, 315 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo)); 316 } 317 318 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, 319 const SimplifyQuery &SQ, unsigned Depth) { 320 KnownBits Known(Mask.getBitWidth()); 321 computeKnownBits(V, Known, Depth, SQ); 322 return Mask.isSubsetOf(Known.Zero); 323 } 324 325 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 326 unsigned Depth, const SimplifyQuery &Q); 327 328 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth, 329 const SimplifyQuery &Q) { 330 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 331 APInt DemandedElts = 332 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 333 return ComputeNumSignBits(V, DemandedElts, Depth, Q); 334 } 335 336 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, 337 unsigned Depth, AssumptionCache *AC, 338 const Instruction *CxtI, 339 const DominatorTree *DT, bool UseInstrInfo) { 340 return ::ComputeNumSignBits( 341 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 342 } 343 344 unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, 345 unsigned Depth, AssumptionCache *AC, 346 const Instruction *CxtI, 347 const DominatorTree *DT) { 348 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); 349 return V->getType()->getScalarSizeInBits() - SignBits + 1; 350 } 351 352 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, 353 bool NSW, const APInt &DemandedElts, 354 KnownBits &KnownOut, KnownBits &Known2, 355 unsigned Depth, const SimplifyQuery &Q) { 356 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q); 357 358 // If one operand is unknown and we have no nowrap information, 359 // the result will be unknown independently of the second operand. 360 if (KnownOut.isUnknown() && !NSW) 361 return; 362 363 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 364 KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut); 365 } 366 367 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, 368 const APInt &DemandedElts, KnownBits &Known, 369 KnownBits &Known2, unsigned Depth, 370 const SimplifyQuery &Q) { 371 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q); 372 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q); 373 374 bool isKnownNegative = false; 375 bool isKnownNonNegative = false; 376 // If the multiplication is known not to overflow, compute the sign bit. 377 if (NSW) { 378 if (Op0 == Op1) { 379 // The product of a number with itself is non-negative. 380 isKnownNonNegative = true; 381 } else { 382 bool isKnownNonNegativeOp1 = Known.isNonNegative(); 383 bool isKnownNonNegativeOp0 = Known2.isNonNegative(); 384 bool isKnownNegativeOp1 = Known.isNegative(); 385 bool isKnownNegativeOp0 = Known2.isNegative(); 386 // The product of two numbers with the same sign is non-negative. 387 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || 388 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); 389 // The product of a negative number and a non-negative number is either 390 // negative or zero. 391 if (!isKnownNonNegative) 392 isKnownNegative = 393 (isKnownNegativeOp1 && isKnownNonNegativeOp0 && 394 Known2.isNonZero()) || 395 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero()); 396 } 397 } 398 399 bool SelfMultiply = Op0 == Op1; 400 if (SelfMultiply) 401 SelfMultiply &= 402 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); 403 Known = KnownBits::mul(Known, Known2, SelfMultiply); 404 405 // Only make use of no-wrap flags if we failed to compute the sign bit 406 // directly. This matters if the multiplication always overflows, in 407 // which case we prefer to follow the result of the direct computation, 408 // though as the program is invoking undefined behaviour we can choose 409 // whatever we like here. 410 if (isKnownNonNegative && !Known.isNegative()) 411 Known.makeNonNegative(); 412 else if (isKnownNegative && !Known.isNonNegative()) 413 Known.makeNegative(); 414 } 415 416 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, 417 KnownBits &Known) { 418 unsigned BitWidth = Known.getBitWidth(); 419 unsigned NumRanges = Ranges.getNumOperands() / 2; 420 assert(NumRanges >= 1); 421 422 Known.Zero.setAllBits(); 423 Known.One.setAllBits(); 424 425 for (unsigned i = 0; i < NumRanges; ++i) { 426 ConstantInt *Lower = 427 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); 428 ConstantInt *Upper = 429 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); 430 ConstantRange Range(Lower->getValue(), Upper->getValue()); 431 432 // The first CommonPrefixBits of all values in Range are equal. 433 unsigned CommonPrefixBits = 434 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero(); 435 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); 436 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth); 437 Known.One &= UnsignedMax & Mask; 438 Known.Zero &= ~UnsignedMax & Mask; 439 } 440 } 441 442 static bool isEphemeralValueOf(const Instruction *I, const Value *E) { 443 SmallVector<const Value *, 16> WorkSet(1, I); 444 SmallPtrSet<const Value *, 32> Visited; 445 SmallPtrSet<const Value *, 16> EphValues; 446 447 // The instruction defining an assumption's condition itself is always 448 // considered ephemeral to that assumption (even if it has other 449 // non-ephemeral users). See r246696's test case for an example. 450 if (is_contained(I->operands(), E)) 451 return true; 452 453 while (!WorkSet.empty()) { 454 const Value *V = WorkSet.pop_back_val(); 455 if (!Visited.insert(V).second) 456 continue; 457 458 // If all uses of this value are ephemeral, then so is this value. 459 if (llvm::all_of(V->users(), [&](const User *U) { 460 return EphValues.count(U); 461 })) { 462 if (V == E) 463 return true; 464 465 if (V == I || (isa<Instruction>(V) && 466 !cast<Instruction>(V)->mayHaveSideEffects() && 467 !cast<Instruction>(V)->isTerminator())) { 468 EphValues.insert(V); 469 if (const User *U = dyn_cast<User>(V)) 470 append_range(WorkSet, U->operands()); 471 } 472 } 473 } 474 475 return false; 476 } 477 478 // Is this an intrinsic that cannot be speculated but also cannot trap? 479 bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { 480 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) 481 return CI->isAssumeLikeIntrinsic(); 482 483 return false; 484 } 485 486 bool llvm::isValidAssumeForContext(const Instruction *Inv, 487 const Instruction *CxtI, 488 const DominatorTree *DT) { 489 // There are two restrictions on the use of an assume: 490 // 1. The assume must dominate the context (or the control flow must 491 // reach the assume whenever it reaches the context). 492 // 2. The context must not be in the assume's set of ephemeral values 493 // (otherwise we will use the assume to prove that the condition 494 // feeding the assume is trivially true, thus causing the removal of 495 // the assume). 496 497 if (Inv->getParent() == CxtI->getParent()) { 498 // If Inv and CtxI are in the same block, check if the assume (Inv) is first 499 // in the BB. 500 if (Inv->comesBefore(CxtI)) 501 return true; 502 503 // Don't let an assume affect itself - this would cause the problems 504 // `isEphemeralValueOf` is trying to prevent, and it would also make 505 // the loop below go out of bounds. 506 if (Inv == CxtI) 507 return false; 508 509 // The context comes first, but they're both in the same block. 510 // Make sure there is nothing in between that might interrupt 511 // the control flow, not even CxtI itself. 512 // We limit the scan distance between the assume and its context instruction 513 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so 514 // it can be adjusted if needed (could be turned into a cl::opt). 515 auto Range = make_range(CxtI->getIterator(), Inv->getIterator()); 516 if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15)) 517 return false; 518 519 return !isEphemeralValueOf(Inv, CxtI); 520 } 521 522 // Inv and CxtI are in different blocks. 523 if (DT) { 524 if (DT->dominates(Inv, CxtI)) 525 return true; 526 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) { 527 // We don't have a DT, but this trivially dominates. 528 return true; 529 } 530 531 return false; 532 } 533 534 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but 535 // we still have enough information about `RHS` to conclude non-zero. For 536 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops 537 // so the extra compile time may not be worth it, but possibly a second API 538 // should be created for use outside of loops. 539 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { 540 // v u> y implies v != 0. 541 if (Pred == ICmpInst::ICMP_UGT) 542 return true; 543 544 // Special-case v != 0 to also handle v != null. 545 if (Pred == ICmpInst::ICMP_NE) 546 return match(RHS, m_Zero()); 547 548 // All other predicates - rely on generic ConstantRange handling. 549 const APInt *C; 550 auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits()); 551 if (match(RHS, m_APInt(C))) { 552 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); 553 return !TrueValues.contains(Zero); 554 } 555 556 auto *VC = dyn_cast<ConstantDataVector>(RHS); 557 if (VC == nullptr) 558 return false; 559 560 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 561 ++ElemIdx) { 562 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( 563 Pred, VC->getElementAsAPInt(ElemIdx)); 564 if (TrueValues.contains(Zero)) 565 return false; 566 } 567 return true; 568 } 569 570 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { 571 // Use of assumptions is context-sensitive. If we don't have a context, we 572 // cannot use them! 573 if (!Q.AC || !Q.CxtI) 574 return false; 575 576 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 577 if (!Elem.Assume) 578 continue; 579 580 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 581 assert(I->getFunction() == Q.CxtI->getFunction() && 582 "Got assumption for the wrong function!"); 583 584 if (Elem.Index != AssumptionCache::ExprResultIdx) { 585 if (!V->getType()->isPointerTy()) 586 continue; 587 if (RetainedKnowledge RK = getKnowledgeFromBundle( 588 *I, I->bundle_op_info_begin()[Elem.Index])) { 589 if (RK.WasOn == V && 590 (RK.AttrKind == Attribute::NonNull || 591 (RK.AttrKind == Attribute::Dereferenceable && 592 !NullPointerIsDefined(Q.CxtI->getFunction(), 593 V->getType()->getPointerAddressSpace()))) && 594 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 595 return true; 596 } 597 continue; 598 } 599 600 // Warning: This loop can end up being somewhat performance sensitive. 601 // We're running this loop for once for each value queried resulting in a 602 // runtime of ~O(#assumes * #values). 603 604 Value *RHS; 605 CmpInst::Predicate Pred; 606 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); 607 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS)))) 608 return false; 609 610 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) 611 return true; 612 } 613 614 return false; 615 } 616 617 static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, 618 Value *LHS, Value *RHS, KnownBits &Known, 619 const SimplifyQuery &Q) { 620 if (RHS->getType()->isPointerTy()) { 621 // Handle comparison of pointer to null explicitly, as it will not be 622 // covered by the m_APInt() logic below. 623 if (LHS == V && match(RHS, m_Zero())) { 624 switch (Pred) { 625 case ICmpInst::ICMP_EQ: 626 Known.setAllZero(); 627 break; 628 case ICmpInst::ICMP_SGE: 629 case ICmpInst::ICMP_SGT: 630 Known.makeNonNegative(); 631 break; 632 case ICmpInst::ICMP_SLT: 633 Known.makeNegative(); 634 break; 635 default: 636 break; 637 } 638 } 639 return; 640 } 641 642 unsigned BitWidth = Known.getBitWidth(); 643 auto m_V = 644 m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); 645 646 const APInt *Mask, *C; 647 uint64_t ShAmt; 648 switch (Pred) { 649 case ICmpInst::ICMP_EQ: 650 // assume(V = C) 651 if (match(LHS, m_V) && match(RHS, m_APInt(C))) { 652 Known = Known.unionWith(KnownBits::makeConstant(*C)); 653 // assume(V & Mask = C) 654 } else if (match(LHS, m_And(m_V, m_APInt(Mask))) && 655 match(RHS, m_APInt(C))) { 656 // For one bits in Mask, we can propagate bits from C to V. 657 Known.Zero |= ~*C & *Mask; 658 Known.One |= *C & *Mask; 659 // assume(V | Mask = C) 660 } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { 661 // For zero bits in Mask, we can propagate bits from C to V. 662 Known.Zero |= ~*C & ~*Mask; 663 Known.One |= *C & ~*Mask; 664 // assume(V ^ Mask = C) 665 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && 666 match(RHS, m_APInt(C))) { 667 // Equivalent to assume(V == Mask ^ C) 668 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask)); 669 // assume(V << ShAmt = C) 670 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) && 671 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 672 // For those bits in C that are known, we can propagate them to known 673 // bits in V shifted to the right by ShAmt. 674 KnownBits RHSKnown = KnownBits::makeConstant(*C); 675 RHSKnown.Zero.lshrInPlace(ShAmt); 676 RHSKnown.One.lshrInPlace(ShAmt); 677 Known = Known.unionWith(RHSKnown); 678 // assume(V >> ShAmt = C) 679 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) && 680 match(RHS, m_APInt(C)) && ShAmt < BitWidth) { 681 KnownBits RHSKnown = KnownBits::makeConstant(*C); 682 // For those bits in RHS that are known, we can propagate them to known 683 // bits in V shifted to the right by C. 684 Known.Zero |= RHSKnown.Zero << ShAmt; 685 Known.One |= RHSKnown.One << ShAmt; 686 } 687 break; 688 case ICmpInst::ICMP_NE: { 689 // assume (V & B != 0) where B is a power of 2 690 const APInt *BPow2; 691 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero())) 692 Known.One |= *BPow2; 693 break; 694 } 695 default: 696 const APInt *Offset = nullptr; 697 if (match(LHS, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset)))) && 698 match(RHS, m_APInt(C))) { 699 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C); 700 if (Offset) 701 LHSRange = LHSRange.sub(*Offset); 702 Known = Known.unionWith(LHSRange.toKnownBits()); 703 } 704 break; 705 } 706 } 707 708 void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, 709 unsigned Depth, const SimplifyQuery &Q) { 710 if (!Q.CxtI) 711 return; 712 713 if (Q.DC && Q.DT) { 714 // Handle dominating conditions. 715 for (BranchInst *BI : Q.DC->conditionsFor(V)) { 716 auto *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); 717 if (!Cmp) 718 continue; 719 720 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); 721 if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) 722 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 723 Cmp->getOperand(1), Known, Q); 724 725 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); 726 if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) 727 computeKnownBitsFromCmp(V, Cmp->getInversePredicate(), 728 Cmp->getOperand(0), Cmp->getOperand(1), Known, 729 Q); 730 } 731 732 if (Known.hasConflict()) 733 Known.resetAll(); 734 } 735 736 if (!Q.AC) 737 return; 738 739 unsigned BitWidth = Known.getBitWidth(); 740 741 // Note that the patterns below need to be kept in sync with the code 742 // in AssumptionCache::updateAffectedValues. 743 744 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { 745 if (!Elem.Assume) 746 continue; 747 748 AssumeInst *I = cast<AssumeInst>(Elem.Assume); 749 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && 750 "Got assumption for the wrong function!"); 751 752 if (Elem.Index != AssumptionCache::ExprResultIdx) { 753 if (!V->getType()->isPointerTy()) 754 continue; 755 if (RetainedKnowledge RK = getKnowledgeFromBundle( 756 *I, I->bundle_op_info_begin()[Elem.Index])) { 757 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment && 758 isPowerOf2_64(RK.ArgValue) && 759 isValidAssumeForContext(I, Q.CxtI, Q.DT)) 760 Known.Zero.setLowBits(Log2_64(RK.ArgValue)); 761 } 762 continue; 763 } 764 765 // Warning: This loop can end up being somewhat performance sensitive. 766 // We're running this loop for once for each value queried resulting in a 767 // runtime of ~O(#assumes * #values). 768 769 Value *Arg = I->getArgOperand(0); 770 771 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 772 assert(BitWidth == 1 && "assume operand is not i1?"); 773 (void)BitWidth; 774 Known.setAllOnes(); 775 return; 776 } 777 if (match(Arg, m_Not(m_Specific(V))) && 778 isValidAssumeForContext(I, Q.CxtI, Q.DT)) { 779 assert(BitWidth == 1 && "assume operand is not i1?"); 780 (void)BitWidth; 781 Known.setAllZero(); 782 return; 783 } 784 785 // The remaining tests are all recursive, so bail out if we hit the limit. 786 if (Depth == MaxAnalysisRecursionDepth) 787 continue; 788 789 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 790 if (!Cmp) 791 continue; 792 793 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 794 continue; 795 796 computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), 797 Cmp->getOperand(1), Known, Q); 798 } 799 800 // Conflicting assumption: Undefined behavior will occur on this execution 801 // path. 802 if (Known.hasConflict()) 803 Known.resetAll(); 804 } 805 806 /// Compute known bits from a shift operator, including those with a 807 /// non-constant shift amount. Known is the output of this function. Known2 is a 808 /// pre-allocated temporary with the same bit width as Known and on return 809 /// contains the known bit of the shift value source. KF is an 810 /// operator-specific function that, given the known-bits and a shift amount, 811 /// compute the implied known-bits of the shift operator's result respectively 812 /// for that shift amount. The results from calling KF are conservatively 813 /// combined for all permitted shift amounts. 814 static void computeKnownBitsFromShiftOperator( 815 const Operator *I, const APInt &DemandedElts, KnownBits &Known, 816 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q, 817 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) { 818 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 819 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 820 // To limit compile-time impact, only query isKnownNonZero() if we know at 821 // least something about the shift amount. 822 bool ShAmtNonZero = 823 Known.isNonZero() || 824 (Known.getMaxValue().ult(Known.getBitWidth()) && 825 isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q)); 826 Known = KF(Known2, Known, ShAmtNonZero); 827 } 828 829 static KnownBits 830 getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts, 831 const KnownBits &KnownLHS, const KnownBits &KnownRHS, 832 unsigned Depth, const SimplifyQuery &Q) { 833 unsigned BitWidth = KnownLHS.getBitWidth(); 834 KnownBits KnownOut(BitWidth); 835 bool IsAnd = false; 836 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero(); 837 Value *X = nullptr, *Y = nullptr; 838 839 switch (I->getOpcode()) { 840 case Instruction::And: 841 KnownOut = KnownLHS & KnownRHS; 842 IsAnd = true; 843 // and(x, -x) is common idioms that will clear all but lowest set 844 // bit. If we have a single known bit in x, we can clear all bits 845 // above it. 846 // TODO: instcombine often reassociates independent `and` which can hide 847 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). 848 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) { 849 // -(-x) == x so using whichever (LHS/RHS) gets us a better result. 850 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros()) 851 KnownOut = KnownLHS.blsi(); 852 else 853 KnownOut = KnownRHS.blsi(); 854 } 855 break; 856 case Instruction::Or: 857 KnownOut = KnownLHS | KnownRHS; 858 break; 859 case Instruction::Xor: 860 KnownOut = KnownLHS ^ KnownRHS; 861 // xor(x, x-1) is common idioms that will clear all but lowest set 862 // bit. If we have a single known bit in x, we can clear all bits 863 // above it. 864 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C != 865 // -1 but for the purpose of demanded bits (xor(x, x-C) & 866 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern 867 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1). 868 if (HasKnownOne && 869 match(I, m_c_Xor(m_Value(X), m_c_Add(m_Deferred(X), m_AllOnes())))) { 870 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS; 871 KnownOut = XBits.blsmsk(); 872 } 873 break; 874 default: 875 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'"); 876 } 877 878 // and(x, add (x, -1)) is a common idiom that always clears the low bit; 879 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit. 880 // here we handle the more general case of adding any odd number by 881 // matching the form and/xor/or(x, add(x, y)) where y is odd. 882 // TODO: This could be generalized to clearing any bit set in y where the 883 // following bit is known to be unset in y. 884 if (!KnownOut.Zero[0] && !KnownOut.One[0] && 885 (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) || 886 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) || 887 match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) { 888 KnownBits KnownY(BitWidth); 889 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q); 890 if (KnownY.countMinTrailingOnes() > 0) { 891 if (IsAnd) 892 KnownOut.Zero.setBit(0); 893 else 894 KnownOut.One.setBit(0); 895 } 896 } 897 return KnownOut; 898 } 899 900 // Public so this can be used in `SimplifyDemandedUseBits`. 901 KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I, 902 const KnownBits &KnownLHS, 903 const KnownBits &KnownRHS, 904 unsigned Depth, 905 const SimplifyQuery &SQ) { 906 auto *FVTy = dyn_cast<FixedVectorType>(I->getType()); 907 APInt DemandedElts = 908 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 909 910 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth, 911 SQ); 912 } 913 914 ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) { 915 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange); 916 // Without vscale_range, we only know that vscale is non-zero. 917 if (!Attr.isValid()) 918 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth)); 919 920 unsigned AttrMin = Attr.getVScaleRangeMin(); 921 // Minimum is larger than vscale width, result is always poison. 922 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth) 923 return ConstantRange::getEmpty(BitWidth); 924 925 APInt Min(BitWidth, AttrMin); 926 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax(); 927 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth) 928 return ConstantRange(Min, APInt::getZero(BitWidth)); 929 930 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1); 931 } 932 933 static void computeKnownBitsFromOperator(const Operator *I, 934 const APInt &DemandedElts, 935 KnownBits &Known, unsigned Depth, 936 const SimplifyQuery &Q) { 937 unsigned BitWidth = Known.getBitWidth(); 938 939 KnownBits Known2(BitWidth); 940 switch (I->getOpcode()) { 941 default: break; 942 case Instruction::Load: 943 if (MDNode *MD = 944 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range)) 945 computeKnownBitsFromRangeMetadata(*MD, Known); 946 break; 947 case Instruction::And: 948 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 949 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 950 951 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 952 break; 953 case Instruction::Or: 954 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 955 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 956 957 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 958 break; 959 case Instruction::Xor: 960 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); 961 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 962 963 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); 964 break; 965 case Instruction::Mul: { 966 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 967 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, 968 Known, Known2, Depth, Q); 969 break; 970 } 971 case Instruction::UDiv: { 972 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 973 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 974 Known = 975 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 976 break; 977 } 978 case Instruction::SDiv: { 979 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 980 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 981 Known = 982 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I))); 983 break; 984 } 985 case Instruction::Select: { 986 const Value *LHS = nullptr, *RHS = nullptr; 987 SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; 988 if (SelectPatternResult::isMinOrMax(SPF)) { 989 computeKnownBits(RHS, Known, Depth + 1, Q); 990 computeKnownBits(LHS, Known2, Depth + 1, Q); 991 switch (SPF) { 992 default: 993 llvm_unreachable("Unhandled select pattern flavor!"); 994 case SPF_SMAX: 995 Known = KnownBits::smax(Known, Known2); 996 break; 997 case SPF_SMIN: 998 Known = KnownBits::smin(Known, Known2); 999 break; 1000 case SPF_UMAX: 1001 Known = KnownBits::umax(Known, Known2); 1002 break; 1003 case SPF_UMIN: 1004 Known = KnownBits::umin(Known, Known2); 1005 break; 1006 } 1007 break; 1008 } 1009 1010 computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); 1011 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1012 1013 // Only known if known in both the LHS and RHS. 1014 Known = Known.intersectWith(Known2); 1015 1016 if (SPF == SPF_ABS) { 1017 // RHS from matchSelectPattern returns the negation part of abs pattern. 1018 // If the negate has an NSW flag we can assume the sign bit of the result 1019 // will be 0 because that makes abs(INT_MIN) undefined. 1020 if (match(RHS, m_Neg(m_Specific(LHS))) && 1021 Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RHS))) 1022 Known.Zero.setSignBit(); 1023 } 1024 1025 break; 1026 } 1027 case Instruction::FPTrunc: 1028 case Instruction::FPExt: 1029 case Instruction::FPToUI: 1030 case Instruction::FPToSI: 1031 case Instruction::SIToFP: 1032 case Instruction::UIToFP: 1033 break; // Can't work with floating point. 1034 case Instruction::PtrToInt: 1035 case Instruction::IntToPtr: 1036 // Fall through and handle them the same as zext/trunc. 1037 [[fallthrough]]; 1038 case Instruction::ZExt: 1039 case Instruction::Trunc: { 1040 Type *SrcTy = I->getOperand(0)->getType(); 1041 1042 unsigned SrcBitWidth; 1043 // Note that we handle pointer operands here because of inttoptr/ptrtoint 1044 // which fall through here. 1045 Type *ScalarTy = SrcTy->getScalarType(); 1046 SrcBitWidth = ScalarTy->isPointerTy() ? 1047 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 1048 Q.DL.getTypeSizeInBits(ScalarTy); 1049 1050 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 1051 Known = Known.anyextOrTrunc(SrcBitWidth); 1052 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1053 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I); 1054 Inst && Inst->hasNonNeg() && !Known.isNegative()) 1055 Known.makeNonNegative(); 1056 Known = Known.zextOrTrunc(BitWidth); 1057 break; 1058 } 1059 case Instruction::BitCast: { 1060 Type *SrcTy = I->getOperand(0)->getType(); 1061 if (SrcTy->isIntOrPtrTy() && 1062 // TODO: For now, not handling conversions like: 1063 // (bitcast i64 %x to <2 x i32>) 1064 !I->getType()->isVectorTy()) { 1065 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1066 break; 1067 } 1068 1069 // Handle cast from vector integer type to scalar or vector integer. 1070 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy); 1071 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() || 1072 !I->getType()->isIntOrIntVectorTy() || 1073 isa<ScalableVectorType>(I->getType())) 1074 break; 1075 1076 // Look through a cast from narrow vector elements to wider type. 1077 // Examples: v4i32 -> v2i64, v3i8 -> v24 1078 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits(); 1079 if (BitWidth % SubBitWidth == 0) { 1080 // Known bits are automatically intersected across demanded elements of a 1081 // vector. So for example, if a bit is computed as known zero, it must be 1082 // zero across all demanded elements of the vector. 1083 // 1084 // For this bitcast, each demanded element of the output is sub-divided 1085 // across a set of smaller vector elements in the source vector. To get 1086 // the known bits for an entire element of the output, compute the known 1087 // bits for each sub-element sequentially. This is done by shifting the 1088 // one-set-bit demanded elements parameter across the sub-elements for 1089 // consecutive calls to computeKnownBits. We are using the demanded 1090 // elements parameter as a mask operator. 1091 // 1092 // The known bits of each sub-element are then inserted into place 1093 // (dependent on endian) to form the full result of known bits. 1094 unsigned NumElts = DemandedElts.getBitWidth(); 1095 unsigned SubScale = BitWidth / SubBitWidth; 1096 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale); 1097 for (unsigned i = 0; i != NumElts; ++i) { 1098 if (DemandedElts[i]) 1099 SubDemandedElts.setBit(i * SubScale); 1100 } 1101 1102 KnownBits KnownSrc(SubBitWidth); 1103 for (unsigned i = 0; i != SubScale; ++i) { 1104 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, 1105 Depth + 1, Q); 1106 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i; 1107 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth); 1108 } 1109 } 1110 break; 1111 } 1112 case Instruction::SExt: { 1113 // Compute the bits in the result that are not present in the input. 1114 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); 1115 1116 Known = Known.trunc(SrcBitWidth); 1117 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1118 // If the sign bit of the input is known set or clear, then we know the 1119 // top bits of the result. 1120 Known = Known.sext(BitWidth); 1121 break; 1122 } 1123 case Instruction::Shl: { 1124 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I)); 1125 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1126 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1127 bool ShAmtNonZero) { 1128 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero); 1129 }; 1130 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1131 KF); 1132 // Trailing zeros of a right-shifted constant never decrease. 1133 const APInt *C; 1134 if (match(I->getOperand(0), m_APInt(C))) 1135 Known.Zero.setLowBits(C->countr_zero()); 1136 break; 1137 } 1138 case Instruction::LShr: { 1139 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1140 bool ShAmtNonZero) { 1141 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero); 1142 }; 1143 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1144 KF); 1145 // Leading zeros of a left-shifted constant never decrease. 1146 const APInt *C; 1147 if (match(I->getOperand(0), m_APInt(C))) 1148 Known.Zero.setHighBits(C->countl_zero()); 1149 break; 1150 } 1151 case Instruction::AShr: { 1152 auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt, 1153 bool ShAmtNonZero) { 1154 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero); 1155 }; 1156 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q, 1157 KF); 1158 break; 1159 } 1160 case Instruction::Sub: { 1161 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1162 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, 1163 DemandedElts, Known, Known2, Depth, Q); 1164 break; 1165 } 1166 case Instruction::Add: { 1167 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I)); 1168 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, 1169 DemandedElts, Known, Known2, Depth, Q); 1170 break; 1171 } 1172 case Instruction::SRem: 1173 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1174 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1175 Known = KnownBits::srem(Known, Known2); 1176 break; 1177 1178 case Instruction::URem: 1179 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1180 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1181 Known = KnownBits::urem(Known, Known2); 1182 break; 1183 case Instruction::Alloca: 1184 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign())); 1185 break; 1186 case Instruction::GetElementPtr: { 1187 // Analyze all of the subscripts of this getelementptr instruction 1188 // to determine if we can prove known low zero bits. 1189 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1190 // Accumulate the constant indices in a separate variable 1191 // to minimize the number of calls to computeForAddSub. 1192 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true); 1193 1194 gep_type_iterator GTI = gep_type_begin(I); 1195 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { 1196 // TrailZ can only become smaller, short-circuit if we hit zero. 1197 if (Known.isUnknown()) 1198 break; 1199 1200 Value *Index = I->getOperand(i); 1201 1202 // Handle case when index is zero. 1203 Constant *CIndex = dyn_cast<Constant>(Index); 1204 if (CIndex && CIndex->isZeroValue()) 1205 continue; 1206 1207 if (StructType *STy = GTI.getStructTypeOrNull()) { 1208 // Handle struct member offset arithmetic. 1209 1210 assert(CIndex && 1211 "Access to structure field must be known at compile time"); 1212 1213 if (CIndex->getType()->isVectorTy()) 1214 Index = CIndex->getSplatValue(); 1215 1216 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 1217 const StructLayout *SL = Q.DL.getStructLayout(STy); 1218 uint64_t Offset = SL->getElementOffset(Idx); 1219 AccConstIndices += Offset; 1220 continue; 1221 } 1222 1223 // Handle array index arithmetic. 1224 Type *IndexedTy = GTI.getIndexedType(); 1225 if (!IndexedTy->isSized()) { 1226 Known.resetAll(); 1227 break; 1228 } 1229 1230 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); 1231 KnownBits IndexBits(IndexBitWidth); 1232 computeKnownBits(Index, IndexBits, Depth + 1, Q); 1233 TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); 1234 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); 1235 KnownBits ScalingFactor(IndexBitWidth); 1236 // Multiply by current sizeof type. 1237 // &A[i] == A + i * sizeof(*A[i]). 1238 if (IndexTypeSize.isScalable()) { 1239 // For scalable types the only thing we know about sizeof is 1240 // that this is a multiple of the minimum size. 1241 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes)); 1242 } else if (IndexBits.isConstant()) { 1243 APInt IndexConst = IndexBits.getConstant(); 1244 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes); 1245 IndexConst *= ScalingFactor; 1246 AccConstIndices += IndexConst.sextOrTrunc(BitWidth); 1247 continue; 1248 } else { 1249 ScalingFactor = 1250 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes)); 1251 } 1252 IndexBits = KnownBits::mul(IndexBits, ScalingFactor); 1253 1254 // If the offsets have a different width from the pointer, according 1255 // to the language reference we need to sign-extend or truncate them 1256 // to the width of the pointer. 1257 IndexBits = IndexBits.sextOrTrunc(BitWidth); 1258 1259 // Note that inbounds does *not* guarantee nsw for the addition, as only 1260 // the offset is signed, while the base address is unsigned. 1261 Known = KnownBits::computeForAddSub( 1262 /*Add=*/true, /*NSW=*/false, Known, IndexBits); 1263 } 1264 if (!Known.isUnknown() && !AccConstIndices.isZero()) { 1265 KnownBits Index = KnownBits::makeConstant(AccConstIndices); 1266 Known = KnownBits::computeForAddSub( 1267 /*Add=*/true, /*NSW=*/false, Known, Index); 1268 } 1269 break; 1270 } 1271 case Instruction::PHI: { 1272 const PHINode *P = cast<PHINode>(I); 1273 BinaryOperator *BO = nullptr; 1274 Value *R = nullptr, *L = nullptr; 1275 if (matchSimpleRecurrence(P, BO, R, L)) { 1276 // Handle the case of a simple two-predecessor recurrence PHI. 1277 // There's a lot more that could theoretically be done here, but 1278 // this is sufficient to catch some interesting cases. 1279 unsigned Opcode = BO->getOpcode(); 1280 1281 // If this is a shift recurrence, we know the bits being shifted in. 1282 // We can combine that with information about the start value of the 1283 // recurrence to conclude facts about the result. 1284 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr || 1285 Opcode == Instruction::Shl) && 1286 BO->getOperand(0) == I) { 1287 1288 // We have matched a recurrence of the form: 1289 // %iv = [R, %entry], [%iv.next, %backedge] 1290 // %iv.next = shift_op %iv, L 1291 1292 // Recurse with the phi context to avoid concern about whether facts 1293 // inferred hold at original context instruction. TODO: It may be 1294 // correct to use the original context. IF warranted, explore and 1295 // add sufficient tests to cover. 1296 SimplifyQuery RecQ = Q; 1297 RecQ.CxtI = P; 1298 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ); 1299 switch (Opcode) { 1300 case Instruction::Shl: 1301 // A shl recurrence will only increase the tailing zeros 1302 Known.Zero.setLowBits(Known2.countMinTrailingZeros()); 1303 break; 1304 case Instruction::LShr: 1305 // A lshr recurrence will preserve the leading zeros of the 1306 // start value 1307 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1308 break; 1309 case Instruction::AShr: 1310 // An ashr recurrence will extend the initial sign bit 1311 Known.Zero.setHighBits(Known2.countMinLeadingZeros()); 1312 Known.One.setHighBits(Known2.countMinLeadingOnes()); 1313 break; 1314 }; 1315 } 1316 1317 // Check for operations that have the property that if 1318 // both their operands have low zero bits, the result 1319 // will have low zero bits. 1320 if (Opcode == Instruction::Add || 1321 Opcode == Instruction::Sub || 1322 Opcode == Instruction::And || 1323 Opcode == Instruction::Or || 1324 Opcode == Instruction::Mul) { 1325 // Change the context instruction to the "edge" that flows into the 1326 // phi. This is important because that is where the value is actually 1327 // "evaluated" even though it is used later somewhere else. (see also 1328 // D69571). 1329 SimplifyQuery RecQ = Q; 1330 1331 unsigned OpNum = P->getOperand(0) == R ? 0 : 1; 1332 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator(); 1333 Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator(); 1334 1335 // Ok, we have a PHI of the form L op= R. Check for low 1336 // zero bits. 1337 RecQ.CxtI = RInst; 1338 computeKnownBits(R, Known2, Depth + 1, RecQ); 1339 1340 // We need to take the minimum number of known bits 1341 KnownBits Known3(BitWidth); 1342 RecQ.CxtI = LInst; 1343 computeKnownBits(L, Known3, Depth + 1, RecQ); 1344 1345 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), 1346 Known3.countMinTrailingZeros())); 1347 1348 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO); 1349 if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) { 1350 // If initial value of recurrence is nonnegative, and we are adding 1351 // a nonnegative number with nsw, the result can only be nonnegative 1352 // or poison value regardless of the number of times we execute the 1353 // add in phi recurrence. If initial value is negative and we are 1354 // adding a negative number with nsw, the result can only be 1355 // negative or poison value. Similar arguments apply to sub and mul. 1356 // 1357 // (add non-negative, non-negative) --> non-negative 1358 // (add negative, negative) --> negative 1359 if (Opcode == Instruction::Add) { 1360 if (Known2.isNonNegative() && Known3.isNonNegative()) 1361 Known.makeNonNegative(); 1362 else if (Known2.isNegative() && Known3.isNegative()) 1363 Known.makeNegative(); 1364 } 1365 1366 // (sub nsw non-negative, negative) --> non-negative 1367 // (sub nsw negative, non-negative) --> negative 1368 else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) { 1369 if (Known2.isNonNegative() && Known3.isNegative()) 1370 Known.makeNonNegative(); 1371 else if (Known2.isNegative() && Known3.isNonNegative()) 1372 Known.makeNegative(); 1373 } 1374 1375 // (mul nsw non-negative, non-negative) --> non-negative 1376 else if (Opcode == Instruction::Mul && Known2.isNonNegative() && 1377 Known3.isNonNegative()) 1378 Known.makeNonNegative(); 1379 } 1380 1381 break; 1382 } 1383 } 1384 1385 // Unreachable blocks may have zero-operand PHI nodes. 1386 if (P->getNumIncomingValues() == 0) 1387 break; 1388 1389 // Otherwise take the unions of the known bit sets of the operands, 1390 // taking conservative care to avoid excessive recursion. 1391 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) { 1392 // Skip if every incoming value references to ourself. 1393 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 1394 break; 1395 1396 Known.Zero.setAllBits(); 1397 Known.One.setAllBits(); 1398 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { 1399 Value *IncValue = P->getIncomingValue(u); 1400 // Skip direct self references. 1401 if (IncValue == P) continue; 1402 1403 // Change the context instruction to the "edge" that flows into the 1404 // phi. This is important because that is where the value is actually 1405 // "evaluated" even though it is used later somewhere else. (see also 1406 // D69571). 1407 SimplifyQuery RecQ = Q; 1408 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); 1409 1410 Known2 = KnownBits(BitWidth); 1411 1412 // Recurse, but cap the recursion to one level, because we don't 1413 // want to waste time spinning around in loops. 1414 // TODO: See if we can base recursion limiter on number of incoming phi 1415 // edges so we don't overly clamp analysis. 1416 computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); 1417 1418 // See if we can further use a conditional branch into the phi 1419 // to help us determine the range of the value. 1420 if (!Known2.isConstant()) { 1421 ICmpInst::Predicate Pred; 1422 const APInt *RHSC; 1423 BasicBlock *TrueSucc, *FalseSucc; 1424 // TODO: Use RHS Value and compute range from its known bits. 1425 if (match(RecQ.CxtI, 1426 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)), 1427 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 1428 // Check for cases of duplicate successors. 1429 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) { 1430 // If we're using the false successor, invert the predicate. 1431 if (FalseSucc == P->getParent()) 1432 Pred = CmpInst::getInversePredicate(Pred); 1433 // Get the knownbits implied by the incoming phi condition. 1434 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC); 1435 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits()); 1436 // We can have conflicts here if we are analyzing deadcode (its 1437 // impossible for us reach this BB based the icmp). 1438 if (KnownUnion.hasConflict()) { 1439 // No reason to continue analyzing in a known dead region, so 1440 // just resetAll and break. This will cause us to also exit the 1441 // outer loop. 1442 Known.resetAll(); 1443 break; 1444 } 1445 Known2 = KnownUnion; 1446 } 1447 } 1448 } 1449 1450 Known = Known.intersectWith(Known2); 1451 // If all bits have been ruled out, there's no need to check 1452 // more operands. 1453 if (Known.isUnknown()) 1454 break; 1455 } 1456 } 1457 break; 1458 } 1459 case Instruction::Call: 1460 case Instruction::Invoke: 1461 // If range metadata is attached to this call, set known bits from that, 1462 // and then intersect with known bits based on other properties of the 1463 // function. 1464 if (MDNode *MD = 1465 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) 1466 computeKnownBitsFromRangeMetadata(*MD, Known); 1467 if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 1468 if (RV->getType() == I->getType()) { 1469 computeKnownBits(RV, Known2, Depth + 1, Q); 1470 Known = Known.unionWith(Known2); 1471 } 1472 } 1473 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1474 switch (II->getIntrinsicID()) { 1475 default: break; 1476 case Intrinsic::abs: { 1477 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1478 bool IntMinIsPoison = match(II->getArgOperand(1), m_One()); 1479 Known = Known2.abs(IntMinIsPoison); 1480 break; 1481 } 1482 case Intrinsic::bitreverse: 1483 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1484 Known.Zero |= Known2.Zero.reverseBits(); 1485 Known.One |= Known2.One.reverseBits(); 1486 break; 1487 case Intrinsic::bswap: 1488 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); 1489 Known.Zero |= Known2.Zero.byteSwap(); 1490 Known.One |= Known2.One.byteSwap(); 1491 break; 1492 case Intrinsic::ctlz: { 1493 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1494 // If we have a known 1, its position is our upper bound. 1495 unsigned PossibleLZ = Known2.countMaxLeadingZeros(); 1496 // If this call is poison for 0 input, the result will be less than 2^n. 1497 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1498 PossibleLZ = std::min(PossibleLZ, BitWidth - 1); 1499 unsigned LowBits = llvm::bit_width(PossibleLZ); 1500 Known.Zero.setBitsFrom(LowBits); 1501 break; 1502 } 1503 case Intrinsic::cttz: { 1504 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1505 // If we have a known 1, its position is our upper bound. 1506 unsigned PossibleTZ = Known2.countMaxTrailingZeros(); 1507 // If this call is poison for 0 input, the result will be less than 2^n. 1508 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) 1509 PossibleTZ = std::min(PossibleTZ, BitWidth - 1); 1510 unsigned LowBits = llvm::bit_width(PossibleTZ); 1511 Known.Zero.setBitsFrom(LowBits); 1512 break; 1513 } 1514 case Intrinsic::ctpop: { 1515 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1516 // We can bound the space the count needs. Also, bits known to be zero 1517 // can't contribute to the population. 1518 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 1519 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 1520 Known.Zero.setBitsFrom(LowBits); 1521 // TODO: we could bound KnownOne using the lower bound on the number 1522 // of bits which might be set provided by popcnt KnownOne2. 1523 break; 1524 } 1525 case Intrinsic::fshr: 1526 case Intrinsic::fshl: { 1527 const APInt *SA; 1528 if (!match(I->getOperand(2), m_APInt(SA))) 1529 break; 1530 1531 // Normalize to funnel shift left. 1532 uint64_t ShiftAmt = SA->urem(BitWidth); 1533 if (II->getIntrinsicID() == Intrinsic::fshr) 1534 ShiftAmt = BitWidth - ShiftAmt; 1535 1536 KnownBits Known3(BitWidth); 1537 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); 1538 computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q); 1539 1540 Known.Zero = 1541 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt); 1542 Known.One = 1543 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt); 1544 break; 1545 } 1546 case Intrinsic::uadd_sat: 1547 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1548 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1549 Known = KnownBits::uadd_sat(Known, Known2); 1550 break; 1551 case Intrinsic::usub_sat: 1552 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1553 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1554 Known = KnownBits::usub_sat(Known, Known2); 1555 break; 1556 case Intrinsic::sadd_sat: 1557 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1558 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1559 Known = KnownBits::sadd_sat(Known, Known2); 1560 break; 1561 case Intrinsic::ssub_sat: 1562 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1563 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1564 Known = KnownBits::ssub_sat(Known, Known2); 1565 break; 1566 case Intrinsic::umin: 1567 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1568 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1569 Known = KnownBits::umin(Known, Known2); 1570 break; 1571 case Intrinsic::umax: 1572 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1573 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1574 Known = KnownBits::umax(Known, Known2); 1575 break; 1576 case Intrinsic::smin: 1577 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1578 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1579 Known = KnownBits::smin(Known, Known2); 1580 break; 1581 case Intrinsic::smax: 1582 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1583 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); 1584 Known = KnownBits::smax(Known, Known2); 1585 break; 1586 case Intrinsic::ptrmask: { 1587 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1588 1589 const Value *Mask = I->getOperand(1); 1590 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits()); 1591 computeKnownBits(Mask, Known2, Depth + 1, Q); 1592 // TODO: 1-extend would be more precise. 1593 Known &= Known2.anyextOrTrunc(BitWidth); 1594 break; 1595 } 1596 case Intrinsic::x86_sse42_crc32_64_64: 1597 Known.Zero.setBitsFrom(32); 1598 break; 1599 case Intrinsic::riscv_vsetvli: 1600 case Intrinsic::riscv_vsetvlimax: 1601 // Assume that VL output is <= 65536. 1602 // TODO: Take SEW and LMUL into account. 1603 if (BitWidth > 17) 1604 Known.Zero.setBitsFrom(17); 1605 break; 1606 case Intrinsic::vscale: { 1607 if (!II->getParent() || !II->getFunction()) 1608 break; 1609 1610 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits(); 1611 break; 1612 } 1613 } 1614 } 1615 break; 1616 case Instruction::ShuffleVector: { 1617 auto *Shuf = dyn_cast<ShuffleVectorInst>(I); 1618 // FIXME: Do we need to handle ConstantExpr involving shufflevectors? 1619 if (!Shuf) { 1620 Known.resetAll(); 1621 return; 1622 } 1623 // For undef elements, we don't know anything about the common state of 1624 // the shuffle result. 1625 APInt DemandedLHS, DemandedRHS; 1626 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) { 1627 Known.resetAll(); 1628 return; 1629 } 1630 Known.One.setAllBits(); 1631 Known.Zero.setAllBits(); 1632 if (!!DemandedLHS) { 1633 const Value *LHS = Shuf->getOperand(0); 1634 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q); 1635 // If we don't know any bits, early out. 1636 if (Known.isUnknown()) 1637 break; 1638 } 1639 if (!!DemandedRHS) { 1640 const Value *RHS = Shuf->getOperand(1); 1641 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q); 1642 Known = Known.intersectWith(Known2); 1643 } 1644 break; 1645 } 1646 case Instruction::InsertElement: { 1647 if (isa<ScalableVectorType>(I->getType())) { 1648 Known.resetAll(); 1649 return; 1650 } 1651 const Value *Vec = I->getOperand(0); 1652 const Value *Elt = I->getOperand(1); 1653 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2)); 1654 // Early out if the index is non-constant or out-of-range. 1655 unsigned NumElts = DemandedElts.getBitWidth(); 1656 if (!CIdx || CIdx->getValue().uge(NumElts)) { 1657 Known.resetAll(); 1658 return; 1659 } 1660 Known.One.setAllBits(); 1661 Known.Zero.setAllBits(); 1662 unsigned EltIdx = CIdx->getZExtValue(); 1663 // Do we demand the inserted element? 1664 if (DemandedElts[EltIdx]) { 1665 computeKnownBits(Elt, Known, Depth + 1, Q); 1666 // If we don't know any bits, early out. 1667 if (Known.isUnknown()) 1668 break; 1669 } 1670 // We don't need the base vector element that has been inserted. 1671 APInt DemandedVecElts = DemandedElts; 1672 DemandedVecElts.clearBit(EltIdx); 1673 if (!!DemandedVecElts) { 1674 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); 1675 Known = Known.intersectWith(Known2); 1676 } 1677 break; 1678 } 1679 case Instruction::ExtractElement: { 1680 // Look through extract element. If the index is non-constant or 1681 // out-of-range demand all elements, otherwise just the extracted element. 1682 const Value *Vec = I->getOperand(0); 1683 const Value *Idx = I->getOperand(1); 1684 auto *CIdx = dyn_cast<ConstantInt>(Idx); 1685 if (isa<ScalableVectorType>(Vec->getType())) { 1686 // FIXME: there's probably *something* we can do with scalable vectors 1687 Known.resetAll(); 1688 break; 1689 } 1690 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); 1691 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 1692 if (CIdx && CIdx->getValue().ult(NumElts)) 1693 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 1694 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q); 1695 break; 1696 } 1697 case Instruction::ExtractValue: 1698 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { 1699 const ExtractValueInst *EVI = cast<ExtractValueInst>(I); 1700 if (EVI->getNumIndices() != 1) break; 1701 if (EVI->getIndices()[0] == 0) { 1702 switch (II->getIntrinsicID()) { 1703 default: break; 1704 case Intrinsic::uadd_with_overflow: 1705 case Intrinsic::sadd_with_overflow: 1706 computeKnownBitsAddSub(true, II->getArgOperand(0), 1707 II->getArgOperand(1), false, DemandedElts, 1708 Known, Known2, Depth, Q); 1709 break; 1710 case Intrinsic::usub_with_overflow: 1711 case Intrinsic::ssub_with_overflow: 1712 computeKnownBitsAddSub(false, II->getArgOperand(0), 1713 II->getArgOperand(1), false, DemandedElts, 1714 Known, Known2, Depth, Q); 1715 break; 1716 case Intrinsic::umul_with_overflow: 1717 case Intrinsic::smul_with_overflow: 1718 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, 1719 DemandedElts, Known, Known2, Depth, Q); 1720 break; 1721 } 1722 } 1723 } 1724 break; 1725 case Instruction::Freeze: 1726 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 1727 Depth + 1)) 1728 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); 1729 break; 1730 } 1731 } 1732 1733 /// Determine which bits of V are known to be either zero or one and return 1734 /// them. 1735 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, 1736 unsigned Depth, const SimplifyQuery &Q) { 1737 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1738 ::computeKnownBits(V, DemandedElts, Known, Depth, Q); 1739 return Known; 1740 } 1741 1742 /// Determine which bits of V are known to be either zero or one and return 1743 /// them. 1744 KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth, 1745 const SimplifyQuery &Q) { 1746 KnownBits Known(getBitWidth(V->getType(), Q.DL)); 1747 computeKnownBits(V, Known, Depth, Q); 1748 return Known; 1749 } 1750 1751 /// Determine which bits of V are known to be either zero or one and return 1752 /// them in the Known bit set. 1753 /// 1754 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that 1755 /// we cannot optimize based on the assumption that it is zero without changing 1756 /// it to be an explicit zero. If we don't change it to zero, other code could 1757 /// optimized based on the contradictory assumption that it is non-zero. 1758 /// Because instcombine aggressively folds operations with undef args anyway, 1759 /// this won't lose us code quality. 1760 /// 1761 /// This function is defined on values with integer type, values with pointer 1762 /// type, and vectors of integers. In the case 1763 /// where V is a vector, known zero, and known one values are the 1764 /// same width as the vector element, and the bit is set only if it is true 1765 /// for all of the demanded elements in the vector specified by DemandedElts. 1766 void computeKnownBits(const Value *V, const APInt &DemandedElts, 1767 KnownBits &Known, unsigned Depth, 1768 const SimplifyQuery &Q) { 1769 if (!DemandedElts) { 1770 // No demanded elts, better to assume we don't know anything. 1771 Known.resetAll(); 1772 return; 1773 } 1774 1775 assert(V && "No Value?"); 1776 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1777 1778 #ifndef NDEBUG 1779 Type *Ty = V->getType(); 1780 unsigned BitWidth = Known.getBitWidth(); 1781 1782 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) && 1783 "Not integer or pointer type!"); 1784 1785 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 1786 assert( 1787 FVTy->getNumElements() == DemandedElts.getBitWidth() && 1788 "DemandedElt width should equal the fixed vector number of elements"); 1789 } else { 1790 assert(DemandedElts == APInt(1, 1) && 1791 "DemandedElt width should be 1 for scalars or scalable vectors"); 1792 } 1793 1794 Type *ScalarTy = Ty->getScalarType(); 1795 if (ScalarTy->isPointerTy()) { 1796 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) && 1797 "V and Known should have same BitWidth"); 1798 } else { 1799 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) && 1800 "V and Known should have same BitWidth"); 1801 } 1802 #endif 1803 1804 const APInt *C; 1805 if (match(V, m_APInt(C))) { 1806 // We know all of the bits for a scalar constant or a splat vector constant! 1807 Known = KnownBits::makeConstant(*C); 1808 return; 1809 } 1810 // Null and aggregate-zero are all-zeros. 1811 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { 1812 Known.setAllZero(); 1813 return; 1814 } 1815 // Handle a constant vector by taking the intersection of the known bits of 1816 // each element. 1817 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 1818 assert(!isa<ScalableVectorType>(V->getType())); 1819 // We know that CDV must be a vector of integers. Take the intersection of 1820 // each element. 1821 Known.Zero.setAllBits(); Known.One.setAllBits(); 1822 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 1823 if (!DemandedElts[i]) 1824 continue; 1825 APInt Elt = CDV->getElementAsAPInt(i); 1826 Known.Zero &= ~Elt; 1827 Known.One &= Elt; 1828 } 1829 if (Known.hasConflict()) 1830 Known.resetAll(); 1831 return; 1832 } 1833 1834 if (const auto *CV = dyn_cast<ConstantVector>(V)) { 1835 assert(!isa<ScalableVectorType>(V->getType())); 1836 // We know that CV must be a vector of integers. Take the intersection of 1837 // each element. 1838 Known.Zero.setAllBits(); Known.One.setAllBits(); 1839 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { 1840 if (!DemandedElts[i]) 1841 continue; 1842 Constant *Element = CV->getAggregateElement(i); 1843 if (isa<PoisonValue>(Element)) 1844 continue; 1845 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); 1846 if (!ElementCI) { 1847 Known.resetAll(); 1848 return; 1849 } 1850 const APInt &Elt = ElementCI->getValue(); 1851 Known.Zero &= ~Elt; 1852 Known.One &= Elt; 1853 } 1854 if (Known.hasConflict()) 1855 Known.resetAll(); 1856 return; 1857 } 1858 1859 // Start out not knowing anything. 1860 Known.resetAll(); 1861 1862 // We can't imply anything about undefs. 1863 if (isa<UndefValue>(V)) 1864 return; 1865 1866 // There's no point in looking through other users of ConstantData for 1867 // assumptions. Confirm that we've handled them all. 1868 assert(!isa<ConstantData>(V) && "Unhandled constant data!"); 1869 1870 // All recursive calls that increase depth must come after this. 1871 if (Depth == MaxAnalysisRecursionDepth) 1872 return; 1873 1874 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has 1875 // the bits of its aliasee. 1876 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 1877 if (!GA->isInterposable()) 1878 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); 1879 return; 1880 } 1881 1882 if (const Operator *I = dyn_cast<Operator>(V)) 1883 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q); 1884 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 1885 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) 1886 Known = CR->toKnownBits(); 1887 } 1888 1889 // Aligned pointers have trailing zeros - refine Known.Zero set 1890 if (isa<PointerType>(V->getType())) { 1891 Align Alignment = V->getPointerAlignment(Q.DL); 1892 Known.Zero.setLowBits(Log2(Alignment)); 1893 } 1894 1895 // computeKnownBitsFromContext strictly refines Known. 1896 // Therefore, we run them after computeKnownBitsFromOperator. 1897 1898 // Check whether we can determine known bits from context such as assumes. 1899 computeKnownBitsFromContext(V, Known, Depth, Q); 1900 1901 assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); 1902 } 1903 1904 /// Try to detect a recurrence that the value of the induction variable is 1905 /// always a power of two (or zero). 1906 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, 1907 unsigned Depth, SimplifyQuery &Q) { 1908 BinaryOperator *BO = nullptr; 1909 Value *Start = nullptr, *Step = nullptr; 1910 if (!matchSimpleRecurrence(PN, BO, Start, Step)) 1911 return false; 1912 1913 // Initial value must be a power of two. 1914 for (const Use &U : PN->operands()) { 1915 if (U.get() == Start) { 1916 // Initial value comes from a different BB, need to adjust context 1917 // instruction for analysis. 1918 Q.CxtI = PN->getIncomingBlock(U)->getTerminator(); 1919 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q)) 1920 return false; 1921 } 1922 } 1923 1924 // Except for Mul, the induction variable must be on the left side of the 1925 // increment expression, otherwise its value can be arbitrary. 1926 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step) 1927 return false; 1928 1929 Q.CxtI = BO->getParent()->getTerminator(); 1930 switch (BO->getOpcode()) { 1931 case Instruction::Mul: 1932 // Power of two is closed under multiplication. 1933 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || 1934 Q.IIQ.hasNoSignedWrap(BO)) && 1935 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q); 1936 case Instruction::SDiv: 1937 // Start value must not be signmask for signed division, so simply being a 1938 // power of two is not sufficient, and it has to be a constant. 1939 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1940 return false; 1941 [[fallthrough]]; 1942 case Instruction::UDiv: 1943 // Divisor must be a power of two. 1944 // If OrZero is false, cannot guarantee induction variable is non-zero after 1945 // division, same for Shr, unless it is exact division. 1946 return (OrZero || Q.IIQ.isExact(BO)) && 1947 isKnownToBeAPowerOfTwo(Step, false, Depth, Q); 1948 case Instruction::Shl: 1949 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO); 1950 case Instruction::AShr: 1951 if (!match(Start, m_Power2()) || match(Start, m_SignMask())) 1952 return false; 1953 [[fallthrough]]; 1954 case Instruction::LShr: 1955 return OrZero || Q.IIQ.isExact(BO); 1956 default: 1957 return false; 1958 } 1959 } 1960 1961 /// Return true if the given value is known to have exactly one 1962 /// bit set when defined. For vectors return true if every element is known to 1963 /// be a power of two when defined. Supports values with integer or pointer 1964 /// types and vectors of integers. 1965 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, 1966 const SimplifyQuery &Q) { 1967 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 1968 1969 if (isa<Constant>(V)) 1970 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2()); 1971 1972 // i1 is by definition a power of 2 or zero. 1973 if (OrZero && V->getType()->getScalarSizeInBits() == 1) 1974 return true; 1975 1976 auto *I = dyn_cast<Instruction>(V); 1977 if (!I) 1978 return false; 1979 1980 if (Q.CxtI && match(V, m_VScale())) { 1981 const Function *F = Q.CxtI->getFunction(); 1982 // The vscale_range indicates vscale is a power-of-two. 1983 return F->hasFnAttribute(Attribute::VScaleRange); 1984 } 1985 1986 // 1 << X is clearly a power of two if the one is not shifted off the end. If 1987 // it is shifted off the end then the result is undefined. 1988 if (match(I, m_Shl(m_One(), m_Value()))) 1989 return true; 1990 1991 // (signmask) >>l X is clearly a power of two if the one is not shifted off 1992 // the bottom. If it is shifted off the bottom then the result is undefined. 1993 if (match(I, m_LShr(m_SignMask(), m_Value()))) 1994 return true; 1995 1996 // The remaining tests are all recursive, so bail out if we hit the limit. 1997 if (Depth++ == MaxAnalysisRecursionDepth) 1998 return false; 1999 2000 switch (I->getOpcode()) { 2001 case Instruction::ZExt: 2002 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2003 case Instruction::Trunc: 2004 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2005 case Instruction::Shl: 2006 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I)) 2007 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2008 return false; 2009 case Instruction::LShr: 2010 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I))) 2011 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2012 return false; 2013 case Instruction::UDiv: 2014 if (Q.IIQ.isExact(cast<BinaryOperator>(I))) 2015 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); 2016 return false; 2017 case Instruction::Mul: 2018 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 2019 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && 2020 (OrZero || isKnownNonZero(I, Depth, Q)); 2021 case Instruction::And: 2022 // A power of two and'd with anything is a power of two or zero. 2023 if (OrZero && 2024 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || 2025 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q))) 2026 return true; 2027 // X & (-X) is always a power of two or zero. 2028 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || 2029 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) 2030 return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q); 2031 return false; 2032 case Instruction::Add: { 2033 // Adding a power-of-two or zero to the same power-of-two or zero yields 2034 // either the original power-of-two, a larger power-of-two or zero. 2035 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); 2036 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || 2037 Q.IIQ.hasNoSignedWrap(VOBO)) { 2038 if (match(I->getOperand(0), 2039 m_c_And(m_Specific(I->getOperand(1)), m_Value())) && 2040 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q)) 2041 return true; 2042 if (match(I->getOperand(1), 2043 m_c_And(m_Specific(I->getOperand(0)), m_Value())) && 2044 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q)) 2045 return true; 2046 2047 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 2048 KnownBits LHSBits(BitWidth); 2049 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q); 2050 2051 KnownBits RHSBits(BitWidth); 2052 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q); 2053 // If i8 V is a power of two or zero: 2054 // ZeroBits: 1 1 1 0 1 1 1 1 2055 // ~ZeroBits: 0 0 0 1 0 0 0 0 2056 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) 2057 // If OrZero isn't set, we cannot give back a zero result. 2058 // Make sure either the LHS or RHS has a bit set. 2059 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) 2060 return true; 2061 } 2062 return false; 2063 } 2064 case Instruction::Select: 2065 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && 2066 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q); 2067 case Instruction::PHI: { 2068 // A PHI node is power of two if all incoming values are power of two, or if 2069 // it is an induction variable where in each step its value is a power of 2070 // two. 2071 auto *PN = cast<PHINode>(I); 2072 SimplifyQuery RecQ = Q; 2073 2074 // Check if it is an induction variable and always power of two. 2075 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) 2076 return true; 2077 2078 // Recursively check all incoming values. Limit recursion to 2 levels, so 2079 // that search complexity is limited to number of operands^2. 2080 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2081 return llvm::all_of(PN->operands(), [&](const Use &U) { 2082 // Value is power of 2 if it is coming from PHI node itself by induction. 2083 if (U.get() == PN) 2084 return true; 2085 2086 // Change the context instruction to the incoming block where it is 2087 // evaluated. 2088 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2089 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); 2090 }); 2091 } 2092 case Instruction::Invoke: 2093 case Instruction::Call: { 2094 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2095 switch (II->getIntrinsicID()) { 2096 case Intrinsic::umax: 2097 case Intrinsic::smax: 2098 case Intrinsic::umin: 2099 case Intrinsic::smin: 2100 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) && 2101 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2102 // bswap/bitreverse just move around bits, but don't change any 1s/0s 2103 // thus dont change pow2/non-pow2 status. 2104 case Intrinsic::bitreverse: 2105 case Intrinsic::bswap: 2106 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2107 case Intrinsic::fshr: 2108 case Intrinsic::fshl: 2109 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x) 2110 if (II->getArgOperand(0) == II->getArgOperand(1)) 2111 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); 2112 break; 2113 default: 2114 break; 2115 } 2116 } 2117 return false; 2118 } 2119 default: 2120 return false; 2121 } 2122 } 2123 2124 /// Test whether a GEP's result is known to be non-null. 2125 /// 2126 /// Uses properties inherent in a GEP to try to determine whether it is known 2127 /// to be non-null. 2128 /// 2129 /// Currently this routine does not support vector GEPs. 2130 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, 2131 const SimplifyQuery &Q) { 2132 const Function *F = nullptr; 2133 if (const Instruction *I = dyn_cast<Instruction>(GEP)) 2134 F = I->getFunction(); 2135 2136 if (!GEP->isInBounds() || 2137 NullPointerIsDefined(F, GEP->getPointerAddressSpace())) 2138 return false; 2139 2140 // FIXME: Support vector-GEPs. 2141 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); 2142 2143 // If the base pointer is non-null, we cannot walk to a null address with an 2144 // inbounds GEP in address space zero. 2145 if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q)) 2146 return true; 2147 2148 // Walk the GEP operands and see if any operand introduces a non-zero offset. 2149 // If so, then the GEP cannot produce a null pointer, as doing so would 2150 // inherently violate the inbounds contract within address space zero. 2151 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 2152 GTI != GTE; ++GTI) { 2153 // Struct types are easy -- they must always be indexed by a constant. 2154 if (StructType *STy = GTI.getStructTypeOrNull()) { 2155 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); 2156 unsigned ElementIdx = OpC->getZExtValue(); 2157 const StructLayout *SL = Q.DL.getStructLayout(STy); 2158 uint64_t ElementOffset = SL->getElementOffset(ElementIdx); 2159 if (ElementOffset > 0) 2160 return true; 2161 continue; 2162 } 2163 2164 // If we have a zero-sized type, the index doesn't matter. Keep looping. 2165 if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero()) 2166 continue; 2167 2168 // Fast path the constant operand case both for efficiency and so we don't 2169 // increment Depth when just zipping down an all-constant GEP. 2170 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { 2171 if (!OpC->isZero()) 2172 return true; 2173 continue; 2174 } 2175 2176 // We post-increment Depth here because while isKnownNonZero increments it 2177 // as well, when we pop back up that increment won't persist. We don't want 2178 // to recurse 10k times just because we have 10k GEP operands. We don't 2179 // bail completely out because we want to handle constant GEPs regardless 2180 // of depth. 2181 if (Depth++ >= MaxAnalysisRecursionDepth) 2182 continue; 2183 2184 if (isKnownNonZero(GTI.getOperand(), Depth, Q)) 2185 return true; 2186 } 2187 2188 return false; 2189 } 2190 2191 static bool isKnownNonNullFromDominatingCondition(const Value *V, 2192 const Instruction *CtxI, 2193 const DominatorTree *DT) { 2194 assert(!isa<Constant>(V) && "Called for constant?"); 2195 2196 if (!CtxI || !DT) 2197 return false; 2198 2199 unsigned NumUsesExplored = 0; 2200 for (const auto *U : V->users()) { 2201 // Avoid massive lists 2202 if (NumUsesExplored >= DomConditionsMaxUses) 2203 break; 2204 NumUsesExplored++; 2205 2206 // If the value is used as an argument to a call or invoke, then argument 2207 // attributes may provide an answer about null-ness. 2208 if (const auto *CB = dyn_cast<CallBase>(U)) 2209 if (auto *CalledFunc = CB->getCalledFunction()) 2210 for (const Argument &Arg : CalledFunc->args()) 2211 if (CB->getArgOperand(Arg.getArgNo()) == V && 2212 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) && 2213 DT->dominates(CB, CtxI)) 2214 return true; 2215 2216 // If the value is used as a load/store, then the pointer must be non null. 2217 if (V == getLoadStorePointerOperand(U)) { 2218 const Instruction *I = cast<Instruction>(U); 2219 if (!NullPointerIsDefined(I->getFunction(), 2220 V->getType()->getPointerAddressSpace()) && 2221 DT->dominates(I, CtxI)) 2222 return true; 2223 } 2224 2225 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) || 2226 match(U, m_IRem(m_Value(), m_Specific(V)))) && 2227 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT)) 2228 return true; 2229 2230 // Consider only compare instructions uniquely controlling a branch 2231 Value *RHS; 2232 CmpInst::Predicate Pred; 2233 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS)))) 2234 continue; 2235 2236 bool NonNullIfTrue; 2237 if (cmpExcludesZero(Pred, RHS)) 2238 NonNullIfTrue = true; 2239 else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS)) 2240 NonNullIfTrue = false; 2241 else 2242 continue; 2243 2244 SmallVector<const User *, 4> WorkList; 2245 SmallPtrSet<const User *, 4> Visited; 2246 for (const auto *CmpU : U->users()) { 2247 assert(WorkList.empty() && "Should be!"); 2248 if (Visited.insert(CmpU).second) 2249 WorkList.push_back(CmpU); 2250 2251 while (!WorkList.empty()) { 2252 auto *Curr = WorkList.pop_back_val(); 2253 2254 // If a user is an AND, add all its users to the work list. We only 2255 // propagate "pred != null" condition through AND because it is only 2256 // correct to assume that all conditions of AND are met in true branch. 2257 // TODO: Support similar logic of OR and EQ predicate? 2258 if (NonNullIfTrue) 2259 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { 2260 for (const auto *CurrU : Curr->users()) 2261 if (Visited.insert(CurrU).second) 2262 WorkList.push_back(CurrU); 2263 continue; 2264 } 2265 2266 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) { 2267 assert(BI->isConditional() && "uses a comparison!"); 2268 2269 BasicBlock *NonNullSuccessor = 2270 BI->getSuccessor(NonNullIfTrue ? 0 : 1); 2271 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); 2272 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) 2273 return true; 2274 } else if (NonNullIfTrue && isGuard(Curr) && 2275 DT->dominates(cast<Instruction>(Curr), CtxI)) { 2276 return true; 2277 } 2278 } 2279 } 2280 } 2281 2282 return false; 2283 } 2284 2285 /// Does the 'Range' metadata (which must be a valid MD_range operand list) 2286 /// ensure that the value it's attached to is never Value? 'RangeType' is 2287 /// is the type of the value described by the range. 2288 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) { 2289 const unsigned NumRanges = Ranges->getNumOperands() / 2; 2290 assert(NumRanges >= 1); 2291 for (unsigned i = 0; i < NumRanges; ++i) { 2292 ConstantInt *Lower = 2293 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0)); 2294 ConstantInt *Upper = 2295 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1)); 2296 ConstantRange Range(Lower->getValue(), Upper->getValue()); 2297 if (Range.contains(Value)) 2298 return false; 2299 } 2300 return true; 2301 } 2302 2303 /// Try to detect a recurrence that monotonically increases/decreases from a 2304 /// non-zero starting value. These are common as induction variables. 2305 static bool isNonZeroRecurrence(const PHINode *PN) { 2306 BinaryOperator *BO = nullptr; 2307 Value *Start = nullptr, *Step = nullptr; 2308 const APInt *StartC, *StepC; 2309 if (!matchSimpleRecurrence(PN, BO, Start, Step) || 2310 !match(Start, m_APInt(StartC)) || StartC->isZero()) 2311 return false; 2312 2313 switch (BO->getOpcode()) { 2314 case Instruction::Add: 2315 // Starting from non-zero and stepping away from zero can never wrap back 2316 // to zero. 2317 return BO->hasNoUnsignedWrap() || 2318 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) && 2319 StartC->isNegative() == StepC->isNegative()); 2320 case Instruction::Mul: 2321 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) && 2322 match(Step, m_APInt(StepC)) && !StepC->isZero(); 2323 case Instruction::Shl: 2324 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap(); 2325 case Instruction::AShr: 2326 case Instruction::LShr: 2327 return BO->isExact(); 2328 default: 2329 return false; 2330 } 2331 } 2332 2333 static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth, 2334 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2335 Value *Y, bool NSW) { 2336 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q); 2337 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q); 2338 2339 // If X and Y are both non-negative (as signed values) then their sum is not 2340 // zero unless both X and Y are zero. 2341 if (XKnown.isNonNegative() && YKnown.isNonNegative()) 2342 if (isKnownNonZero(Y, DemandedElts, Depth, Q) || 2343 isKnownNonZero(X, DemandedElts, Depth, Q)) 2344 return true; 2345 2346 // If X and Y are both negative (as signed values) then their sum is not 2347 // zero unless both X and Y equal INT_MIN. 2348 if (XKnown.isNegative() && YKnown.isNegative()) { 2349 APInt Mask = APInt::getSignedMaxValue(BitWidth); 2350 // The sign bit of X is set. If some other bit is set then X is not equal 2351 // to INT_MIN. 2352 if (XKnown.One.intersects(Mask)) 2353 return true; 2354 // The sign bit of Y is set. If some other bit is set then Y is not equal 2355 // to INT_MIN. 2356 if (YKnown.One.intersects(Mask)) 2357 return true; 2358 } 2359 2360 // The sum of a non-negative number and a power of two is not zero. 2361 if (XKnown.isNonNegative() && 2362 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) 2363 return true; 2364 if (YKnown.isNonNegative() && 2365 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) 2366 return true; 2367 2368 return KnownBits::computeForAddSub(/*Add*/ true, NSW, XKnown, YKnown) 2369 .isNonZero(); 2370 } 2371 2372 static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth, 2373 const SimplifyQuery &Q, unsigned BitWidth, Value *X, 2374 Value *Y) { 2375 // TODO: Move this case into isKnownNonEqual(). 2376 if (auto *C = dyn_cast<Constant>(X)) 2377 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q)) 2378 return true; 2379 2380 return ::isKnownNonEqual(X, Y, Depth, Q); 2381 } 2382 2383 static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts, 2384 unsigned Depth, const SimplifyQuery &Q, 2385 const KnownBits &KnownVal) { 2386 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2387 switch (I->getOpcode()) { 2388 case Instruction::Shl: 2389 return Lhs.shl(Rhs); 2390 case Instruction::LShr: 2391 return Lhs.lshr(Rhs); 2392 case Instruction::AShr: 2393 return Lhs.ashr(Rhs); 2394 default: 2395 llvm_unreachable("Unknown Shift Opcode"); 2396 } 2397 }; 2398 2399 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { 2400 switch (I->getOpcode()) { 2401 case Instruction::Shl: 2402 return Lhs.lshr(Rhs); 2403 case Instruction::LShr: 2404 case Instruction::AShr: 2405 return Lhs.shl(Rhs); 2406 default: 2407 llvm_unreachable("Unknown Shift Opcode"); 2408 } 2409 }; 2410 2411 if (KnownVal.isUnknown()) 2412 return false; 2413 2414 KnownBits KnownCnt = 2415 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2416 APInt MaxShift = KnownCnt.getMaxValue(); 2417 unsigned NumBits = KnownVal.getBitWidth(); 2418 if (MaxShift.uge(NumBits)) 2419 return false; 2420 2421 if (!ShiftOp(KnownVal.One, MaxShift).isZero()) 2422 return true; 2423 2424 // If all of the bits shifted out are known to be zero, and Val is known 2425 // non-zero then at least one non-zero bit must remain. 2426 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) 2427 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && 2428 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q)) 2429 return true; 2430 2431 return false; 2432 } 2433 2434 static bool isKnownNonZeroFromOperator(const Operator *I, 2435 const APInt &DemandedElts, 2436 unsigned Depth, const SimplifyQuery &Q) { 2437 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL); 2438 switch (I->getOpcode()) { 2439 case Instruction::Alloca: 2440 // Alloca never returns null, malloc might. 2441 return I->getType()->getPointerAddressSpace() == 0; 2442 case Instruction::GetElementPtr: 2443 if (I->getType()->isPointerTy()) 2444 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q); 2445 break; 2446 case Instruction::BitCast: { 2447 // We need to be a bit careful here. We can only peek through the bitcast 2448 // if the scalar size of elements in the operand are smaller than and a 2449 // multiple of the size they are casting too. Take three cases: 2450 // 2451 // 1) Unsafe: 2452 // bitcast <2 x i16> %NonZero to <4 x i8> 2453 // 2454 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a 2455 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't 2456 // guranteed (imagine just sign bit set in the 2 i16 elements). 2457 // 2458 // 2) Unsafe: 2459 // bitcast <4 x i3> %NonZero to <3 x i4> 2460 // 2461 // Even though the scalar size of the src (`i3`) is smaller than the 2462 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4` 2463 // its possible for the `3 x i4` elements to be zero because there are 2464 // some elements in the destination that don't contain any full src 2465 // element. 2466 // 2467 // 3) Safe: 2468 // bitcast <4 x i8> %NonZero to <2 x i16> 2469 // 2470 // This is always safe as non-zero in the 4 i8 elements implies 2471 // non-zero in the combination of any two adjacent ones. Since i8 is a 2472 // multiple of i16, each i16 is guranteed to have 2 full i8 elements. 2473 // This all implies the 2 i16 elements are non-zero. 2474 Type *FromTy = I->getOperand(0)->getType(); 2475 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) && 2476 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0) 2477 return isKnownNonZero(I->getOperand(0), Depth, Q); 2478 } break; 2479 case Instruction::IntToPtr: 2480 // Note that we have to take special care to avoid looking through 2481 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well 2482 // as casts that can alter the value, e.g., AddrSpaceCasts. 2483 if (!isa<ScalableVectorType>(I->getType()) && 2484 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2485 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2486 return isKnownNonZero(I->getOperand(0), Depth, Q); 2487 break; 2488 case Instruction::PtrToInt: 2489 // Similar to int2ptr above, we can look through ptr2int here if the cast 2490 // is a no-op or an extend and not a truncate. 2491 if (!isa<ScalableVectorType>(I->getType()) && 2492 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <= 2493 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue()) 2494 return isKnownNonZero(I->getOperand(0), Depth, Q); 2495 break; 2496 case Instruction::Sub: 2497 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2498 I->getOperand(1)); 2499 case Instruction::Or: 2500 // X | Y != 0 if X != 0 or Y != 0. 2501 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2502 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2503 case Instruction::SExt: 2504 case Instruction::ZExt: 2505 // ext X != 0 if X != 0. 2506 return isKnownNonZero(I->getOperand(0), Depth, Q); 2507 2508 case Instruction::Shl: { 2509 // shl nsw/nuw can't remove any non-zero bits. 2510 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2511 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO)) 2512 return isKnownNonZero(I->getOperand(0), Depth, Q); 2513 2514 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined 2515 // if the lowest bit is shifted off the end. 2516 KnownBits Known(BitWidth); 2517 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q); 2518 if (Known.One[0]) 2519 return true; 2520 2521 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2522 } 2523 case Instruction::LShr: 2524 case Instruction::AShr: { 2525 // shr exact can only shift out zero bits. 2526 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I); 2527 if (BO->isExact()) 2528 return isKnownNonZero(I->getOperand(0), Depth, Q); 2529 2530 // shr X, Y != 0 if X is negative. Note that the value of the shift is not 2531 // defined if the sign bit is shifted off the end. 2532 KnownBits Known = 2533 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2534 if (Known.isNegative()) 2535 return true; 2536 2537 return isNonZeroShift(I, DemandedElts, Depth, Q, Known); 2538 } 2539 case Instruction::UDiv: 2540 case Instruction::SDiv: { 2541 // X / Y 2542 // div exact can only produce a zero if the dividend is zero. 2543 if (cast<PossiblyExactOperator>(I)->isExact()) 2544 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2545 2546 std::optional<bool> XUgeY; 2547 KnownBits XKnown = 2548 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2549 // If X is fully unknown we won't be able to figure anything out so don't 2550 // both computing knownbits for Y. 2551 if (XKnown.isUnknown()) 2552 return false; 2553 2554 KnownBits YKnown = 2555 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2556 if (I->getOpcode() == Instruction::SDiv) { 2557 // For signed division need to compare abs value of the operands. 2558 XKnown = XKnown.abs(/*IntMinIsPoison*/ false); 2559 YKnown = YKnown.abs(/*IntMinIsPoison*/ false); 2560 } 2561 // If X u>= Y then div is non zero (0/0 is UB). 2562 XUgeY = KnownBits::uge(XKnown, YKnown); 2563 // If X is total unknown or X u< Y we won't be able to prove non-zero 2564 // with compute known bits so just return early. 2565 return XUgeY && *XUgeY; 2566 } 2567 case Instruction::Add: { 2568 // X + Y. 2569 2570 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is 2571 // non-zero. 2572 auto *BO = cast<OverflowingBinaryOperator>(I); 2573 if (Q.IIQ.hasNoUnsignedWrap(BO)) 2574 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) || 2575 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2576 2577 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0), 2578 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO)); 2579 } 2580 case Instruction::Mul: { 2581 // If X and Y are non-zero then so is X * Y as long as the multiplication 2582 // does not overflow. 2583 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I); 2584 if (Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) 2585 return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) && 2586 isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2587 2588 // If either X or Y is odd, then if the other is non-zero the result can't 2589 // be zero. 2590 KnownBits XKnown = 2591 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); 2592 if (XKnown.One[0]) 2593 return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q); 2594 2595 KnownBits YKnown = 2596 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); 2597 if (YKnown.One[0]) 2598 return XKnown.isNonZero() || 2599 isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); 2600 2601 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is 2602 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking 2603 // the lowest known One of X and Y. If they are non-zero, the result 2604 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing 2605 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth. 2606 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) < 2607 BitWidth; 2608 } 2609 case Instruction::Select: { 2610 // (C ? X : Y) != 0 if X != 0 and Y != 0. 2611 2612 // First check if the arm is non-zero using `isKnownNonZero`. If that fails, 2613 // then see if the select condition implies the arm is non-zero. For example 2614 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is 2615 // dominated by `X != 0`. 2616 auto SelectArmIsNonZero = [&](bool IsTrueArm) { 2617 Value *Op; 2618 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2); 2619 // Op is trivially non-zero. 2620 if (isKnownNonZero(Op, DemandedElts, Depth, Q)) 2621 return true; 2622 2623 // The condition of the select dominates the true/false arm. Check if the 2624 // condition implies that a given arm is non-zero. 2625 Value *X; 2626 CmpInst::Predicate Pred; 2627 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X)))) 2628 return false; 2629 2630 if (!IsTrueArm) 2631 Pred = ICmpInst::getInversePredicate(Pred); 2632 2633 return cmpExcludesZero(Pred, X); 2634 }; 2635 2636 if (SelectArmIsNonZero(/* IsTrueArm */ true) && 2637 SelectArmIsNonZero(/* IsTrueArm */ false)) 2638 return true; 2639 break; 2640 } 2641 case Instruction::PHI: { 2642 auto *PN = cast<PHINode>(I); 2643 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN)) 2644 return true; 2645 2646 // Check if all incoming values are non-zero using recursion. 2647 SimplifyQuery RecQ = Q; 2648 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); 2649 return llvm::all_of(PN->operands(), [&](const Use &U) { 2650 if (U.get() == PN) 2651 return true; 2652 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); 2653 // Check if the branch on the phi excludes zero. 2654 ICmpInst::Predicate Pred; 2655 Value *X; 2656 BasicBlock *TrueSucc, *FalseSucc; 2657 if (match(RecQ.CxtI, 2658 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)), 2659 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { 2660 // Check for cases of duplicate successors. 2661 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) { 2662 // If we're using the false successor, invert the predicate. 2663 if (FalseSucc == PN->getParent()) 2664 Pred = CmpInst::getInversePredicate(Pred); 2665 if (cmpExcludesZero(Pred, X)) 2666 return true; 2667 } 2668 } 2669 // Finally recurse on the edge and check it directly. 2670 return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); 2671 }); 2672 } 2673 case Instruction::ExtractElement: 2674 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) { 2675 const Value *Vec = EEI->getVectorOperand(); 2676 const Value *Idx = EEI->getIndexOperand(); 2677 auto *CIdx = dyn_cast<ConstantInt>(Idx); 2678 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 2679 unsigned NumElts = VecTy->getNumElements(); 2680 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 2681 if (CIdx && CIdx->getValue().ult(NumElts)) 2682 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 2683 return isKnownNonZero(Vec, DemandedVecElts, Depth, Q); 2684 } 2685 } 2686 break; 2687 case Instruction::Freeze: 2688 return isKnownNonZero(I->getOperand(0), Depth, Q) && 2689 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, 2690 Depth); 2691 case Instruction::Load: { 2692 auto *LI = cast<LoadInst>(I); 2693 // A Load tagged with nonnull or dereferenceable with null pointer undefined 2694 // is never null. 2695 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) 2696 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) || 2697 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) && 2698 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace()))) 2699 return true; 2700 2701 // No need to fall through to computeKnownBits as range metadata is already 2702 // handled in isKnownNonZero. 2703 return false; 2704 } 2705 case Instruction::Call: 2706 case Instruction::Invoke: 2707 if (I->getType()->isPointerTy()) { 2708 const auto *Call = cast<CallBase>(I); 2709 if (Call->isReturnNonNull()) 2710 return true; 2711 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) 2712 return isKnownNonZero(RP, Depth, Q); 2713 } else if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { 2714 if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q)) 2715 return true; 2716 } 2717 2718 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 2719 switch (II->getIntrinsicID()) { 2720 case Intrinsic::sshl_sat: 2721 case Intrinsic::ushl_sat: 2722 case Intrinsic::abs: 2723 case Intrinsic::bitreverse: 2724 case Intrinsic::bswap: 2725 case Intrinsic::ctpop: 2726 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2727 case Intrinsic::ssub_sat: 2728 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, 2729 II->getArgOperand(0), II->getArgOperand(1)); 2730 case Intrinsic::sadd_sat: 2731 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, 2732 II->getArgOperand(0), II->getArgOperand(1), 2733 /*NSW*/ true); 2734 case Intrinsic::umax: 2735 case Intrinsic::uadd_sat: 2736 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) || 2737 isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2738 case Intrinsic::smin: 2739 case Intrinsic::smax: { 2740 auto KnownOpImpliesNonZero = [&](const KnownBits &K) { 2741 return II->getIntrinsicID() == Intrinsic::smin 2742 ? K.isNegative() 2743 : K.isStrictlyPositive(); 2744 }; 2745 KnownBits XKnown = 2746 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q); 2747 if (KnownOpImpliesNonZero(XKnown)) 2748 return true; 2749 KnownBits YKnown = 2750 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q); 2751 if (KnownOpImpliesNonZero(YKnown)) 2752 return true; 2753 2754 if (XKnown.isNonZero() && YKnown.isNonZero()) 2755 return true; 2756 } 2757 [[fallthrough]]; 2758 case Intrinsic::umin: 2759 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) && 2760 isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q); 2761 case Intrinsic::cttz: 2762 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2763 .Zero[0]; 2764 case Intrinsic::ctlz: 2765 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q) 2766 .isNonNegative(); 2767 case Intrinsic::fshr: 2768 case Intrinsic::fshl: 2769 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0. 2770 if (II->getArgOperand(0) == II->getArgOperand(1)) 2771 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); 2772 break; 2773 case Intrinsic::vscale: 2774 return true; 2775 default: 2776 break; 2777 } 2778 break; 2779 } 2780 2781 return false; 2782 } 2783 2784 KnownBits Known(BitWidth); 2785 computeKnownBits(I, DemandedElts, Known, Depth, Q); 2786 return Known.One != 0; 2787 } 2788 2789 /// Return true if the given value is known to be non-zero when defined. For 2790 /// vectors, return true if every demanded element is known to be non-zero when 2791 /// defined. For pointers, if the context instruction and dominator tree are 2792 /// specified, perform context-sensitive analysis and return true if the 2793 /// pointer couldn't possibly be null at the specified instruction. 2794 /// Supports values with integer or pointer type and vectors of integers. 2795 bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, 2796 const SimplifyQuery &Q) { 2797 2798 #ifndef NDEBUG 2799 Type *Ty = V->getType(); 2800 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 2801 2802 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 2803 assert( 2804 FVTy->getNumElements() == DemandedElts.getBitWidth() && 2805 "DemandedElt width should equal the fixed vector number of elements"); 2806 } else { 2807 assert(DemandedElts == APInt(1, 1) && 2808 "DemandedElt width should be 1 for scalars"); 2809 } 2810 #endif 2811 2812 if (auto *C = dyn_cast<Constant>(V)) { 2813 if (C->isNullValue()) 2814 return false; 2815 if (isa<ConstantInt>(C)) 2816 // Must be non-zero due to null test above. 2817 return true; 2818 2819 // For constant vectors, check that all elements are undefined or known 2820 // non-zero to determine that the whole vector is known non-zero. 2821 if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) { 2822 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) { 2823 if (!DemandedElts[i]) 2824 continue; 2825 Constant *Elt = C->getAggregateElement(i); 2826 if (!Elt || Elt->isNullValue()) 2827 return false; 2828 if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt)) 2829 return false; 2830 } 2831 return true; 2832 } 2833 2834 // A global variable in address space 0 is non null unless extern weak 2835 // or an absolute symbol reference. Other address spaces may have null as a 2836 // valid address for a global, so we can't assume anything. 2837 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 2838 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() && 2839 GV->getType()->getAddressSpace() == 0) 2840 return true; 2841 } 2842 2843 // For constant expressions, fall through to the Operator code below. 2844 if (!isa<ConstantExpr>(V)) 2845 return false; 2846 } 2847 2848 if (auto *I = dyn_cast<Instruction>(V)) { 2849 if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) { 2850 // If the possible ranges don't contain zero, then the value is 2851 // definitely non-zero. 2852 if (auto *Ty = dyn_cast<IntegerType>(V->getType())) { 2853 const APInt ZeroValue(Ty->getBitWidth(), 0); 2854 if (rangeMetadataExcludesValue(Ranges, ZeroValue)) 2855 return true; 2856 } 2857 } 2858 } 2859 2860 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q)) 2861 return true; 2862 2863 // Some of the tests below are recursive, so bail out if we hit the limit. 2864 if (Depth++ >= MaxAnalysisRecursionDepth) 2865 return false; 2866 2867 // Check for pointer simplifications. 2868 2869 if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { 2870 // A byval, inalloca may not be null in a non-default addres space. A 2871 // nonnull argument is assumed never 0. 2872 if (const Argument *A = dyn_cast<Argument>(V)) { 2873 if (((A->hasPassPointeeByValueCopyAttr() && 2874 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) || 2875 A->hasNonNullAttr())) 2876 return true; 2877 } 2878 } 2879 2880 if (const auto *I = dyn_cast<Operator>(V)) 2881 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q)) 2882 return true; 2883 2884 if (!isa<Constant>(V) && 2885 isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) 2886 return true; 2887 2888 return false; 2889 } 2890 2891 bool isKnownNonZero(const Value *V, unsigned Depth, const SimplifyQuery &Q) { 2892 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 2893 APInt DemandedElts = 2894 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 2895 return isKnownNonZero(V, DemandedElts, Depth, Q); 2896 } 2897 2898 /// If the pair of operators are the same invertible function, return the 2899 /// the operands of the function corresponding to each input. Otherwise, 2900 /// return std::nullopt. An invertible function is one that is 1-to-1 and maps 2901 /// every input value to exactly one output value. This is equivalent to 2902 /// saying that Op1 and Op2 are equal exactly when the specified pair of 2903 /// operands are equal, (except that Op1 and Op2 may be poison more often.) 2904 static std::optional<std::pair<Value*, Value*>> 2905 getInvertibleOperands(const Operator *Op1, 2906 const Operator *Op2) { 2907 if (Op1->getOpcode() != Op2->getOpcode()) 2908 return std::nullopt; 2909 2910 auto getOperands = [&](unsigned OpNum) -> auto { 2911 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum)); 2912 }; 2913 2914 switch (Op1->getOpcode()) { 2915 default: 2916 break; 2917 case Instruction::Add: 2918 case Instruction::Sub: 2919 if (Op1->getOperand(0) == Op2->getOperand(0)) 2920 return getOperands(1); 2921 if (Op1->getOperand(1) == Op2->getOperand(1)) 2922 return getOperands(0); 2923 break; 2924 case Instruction::Mul: { 2925 // invertible if A * B == (A * B) mod 2^N where A, and B are integers 2926 // and N is the bitwdith. The nsw case is non-obvious, but proven by 2927 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK 2928 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2929 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2930 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2931 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2932 break; 2933 2934 // Assume operand order has been canonicalized 2935 if (Op1->getOperand(1) == Op2->getOperand(1) && 2936 isa<ConstantInt>(Op1->getOperand(1)) && 2937 !cast<ConstantInt>(Op1->getOperand(1))->isZero()) 2938 return getOperands(0); 2939 break; 2940 } 2941 case Instruction::Shl: { 2942 // Same as multiplies, with the difference that we don't need to check 2943 // for a non-zero multiply. Shifts always multiply by non-zero. 2944 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1); 2945 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2); 2946 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) && 2947 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap())) 2948 break; 2949 2950 if (Op1->getOperand(1) == Op2->getOperand(1)) 2951 return getOperands(0); 2952 break; 2953 } 2954 case Instruction::AShr: 2955 case Instruction::LShr: { 2956 auto *PEO1 = cast<PossiblyExactOperator>(Op1); 2957 auto *PEO2 = cast<PossiblyExactOperator>(Op2); 2958 if (!PEO1->isExact() || !PEO2->isExact()) 2959 break; 2960 2961 if (Op1->getOperand(1) == Op2->getOperand(1)) 2962 return getOperands(0); 2963 break; 2964 } 2965 case Instruction::SExt: 2966 case Instruction::ZExt: 2967 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType()) 2968 return getOperands(0); 2969 break; 2970 case Instruction::PHI: { 2971 const PHINode *PN1 = cast<PHINode>(Op1); 2972 const PHINode *PN2 = cast<PHINode>(Op2); 2973 2974 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences 2975 // are a single invertible function of the start values? Note that repeated 2976 // application of an invertible function is also invertible 2977 BinaryOperator *BO1 = nullptr; 2978 Value *Start1 = nullptr, *Step1 = nullptr; 2979 BinaryOperator *BO2 = nullptr; 2980 Value *Start2 = nullptr, *Step2 = nullptr; 2981 if (PN1->getParent() != PN2->getParent() || 2982 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) || 2983 !matchSimpleRecurrence(PN2, BO2, Start2, Step2)) 2984 break; 2985 2986 auto Values = getInvertibleOperands(cast<Operator>(BO1), 2987 cast<Operator>(BO2)); 2988 if (!Values) 2989 break; 2990 2991 // We have to be careful of mutually defined recurrences here. Ex: 2992 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V 2993 // * X_i = Y_i = X_(i-1) OP Y_(i-1) 2994 // The invertibility of these is complicated, and not worth reasoning 2995 // about (yet?). 2996 if (Values->first != PN1 || Values->second != PN2) 2997 break; 2998 2999 return std::make_pair(Start1, Start2); 3000 } 3001 } 3002 return std::nullopt; 3003 } 3004 3005 /// Return true if V2 == V1 + X, where X is known non-zero. 3006 static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth, 3007 const SimplifyQuery &Q) { 3008 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); 3009 if (!BO || BO->getOpcode() != Instruction::Add) 3010 return false; 3011 Value *Op = nullptr; 3012 if (V2 == BO->getOperand(0)) 3013 Op = BO->getOperand(1); 3014 else if (V2 == BO->getOperand(1)) 3015 Op = BO->getOperand(0); 3016 else 3017 return false; 3018 return isKnownNonZero(Op, Depth + 1, Q); 3019 } 3020 3021 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and 3022 /// the multiplication is nuw or nsw. 3023 static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth, 3024 const SimplifyQuery &Q) { 3025 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 3026 const APInt *C; 3027 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) && 3028 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 3029 !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q); 3030 } 3031 return false; 3032 } 3033 3034 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and 3035 /// the shift is nuw or nsw. 3036 static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth, 3037 const SimplifyQuery &Q) { 3038 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) { 3039 const APInt *C; 3040 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) && 3041 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) && 3042 !C->isZero() && isKnownNonZero(V1, Depth + 1, Q); 3043 } 3044 return false; 3045 } 3046 3047 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2, 3048 unsigned Depth, const SimplifyQuery &Q) { 3049 // Check two PHIs are in same block. 3050 if (PN1->getParent() != PN2->getParent()) 3051 return false; 3052 3053 SmallPtrSet<const BasicBlock *, 8> VisitedBBs; 3054 bool UsedFullRecursion = false; 3055 for (const BasicBlock *IncomBB : PN1->blocks()) { 3056 if (!VisitedBBs.insert(IncomBB).second) 3057 continue; // Don't reprocess blocks that we have dealt with already. 3058 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB); 3059 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB); 3060 const APInt *C1, *C2; 3061 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2) 3062 continue; 3063 3064 // Only one pair of phi operands is allowed for full recursion. 3065 if (UsedFullRecursion) 3066 return false; 3067 3068 SimplifyQuery RecQ = Q; 3069 RecQ.CxtI = IncomBB->getTerminator(); 3070 if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ)) 3071 return false; 3072 UsedFullRecursion = true; 3073 } 3074 return true; 3075 } 3076 3077 static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth, 3078 const SimplifyQuery &Q) { 3079 const SelectInst *SI1 = dyn_cast<SelectInst>(V1); 3080 if (!SI1) 3081 return false; 3082 3083 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) { 3084 const Value *Cond1 = SI1->getCondition(); 3085 const Value *Cond2 = SI2->getCondition(); 3086 if (Cond1 == Cond2) 3087 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(), 3088 Depth + 1, Q) && 3089 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(), 3090 Depth + 1, Q); 3091 } 3092 return isKnownNonEqual(SI1->getTrueValue(), V2, Depth + 1, Q) && 3093 isKnownNonEqual(SI1->getFalseValue(), V2, Depth + 1, Q); 3094 } 3095 3096 // Check to see if A is both a GEP and is the incoming value for a PHI in the 3097 // loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values, 3098 // one of them being the recursive GEP A and the other a ptr at same base and at 3099 // the same/higher offset than B we are only incrementing the pointer further in 3100 // loop if offset of recursive GEP is greater than 0. 3101 static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B, 3102 const SimplifyQuery &Q) { 3103 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) 3104 return false; 3105 3106 auto *GEPA = dyn_cast<GEPOperator>(A); 3107 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin())) 3108 return false; 3109 3110 // Handle 2 incoming PHI values with one being a recursive GEP. 3111 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand()); 3112 if (!PN || PN->getNumIncomingValues() != 2) 3113 return false; 3114 3115 // Search for the recursive GEP as an incoming operand, and record that as 3116 // Step. 3117 Value *Start = nullptr; 3118 Value *Step = const_cast<Value *>(A); 3119 if (PN->getIncomingValue(0) == Step) 3120 Start = PN->getIncomingValue(1); 3121 else if (PN->getIncomingValue(1) == Step) 3122 Start = PN->getIncomingValue(0); 3123 else 3124 return false; 3125 3126 // Other incoming node base should match the B base. 3127 // StartOffset >= OffsetB && StepOffset > 0? 3128 // StartOffset <= OffsetB && StepOffset < 0? 3129 // Is non-equal if above are true. 3130 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the 3131 // optimisation to inbounds GEPs only. 3132 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType()); 3133 APInt StartOffset(IndexWidth, 0); 3134 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset); 3135 APInt StepOffset(IndexWidth, 0); 3136 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset); 3137 3138 // Check if Base Pointer of Step matches the PHI. 3139 if (Step != PN) 3140 return false; 3141 APInt OffsetB(IndexWidth, 0); 3142 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB); 3143 return Start == B && 3144 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) || 3145 (StartOffset.sle(OffsetB) && StepOffset.isNegative())); 3146 } 3147 3148 /// Return true if it is known that V1 != V2. 3149 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, 3150 const SimplifyQuery &Q) { 3151 if (V1 == V2) 3152 return false; 3153 if (V1->getType() != V2->getType()) 3154 // We can't look through casts yet. 3155 return false; 3156 3157 if (Depth >= MaxAnalysisRecursionDepth) 3158 return false; 3159 3160 // See if we can recurse through (exactly one of) our operands. This 3161 // requires our operation be 1-to-1 and map every input value to exactly 3162 // one output value. Such an operation is invertible. 3163 auto *O1 = dyn_cast<Operator>(V1); 3164 auto *O2 = dyn_cast<Operator>(V2); 3165 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) { 3166 if (auto Values = getInvertibleOperands(O1, O2)) 3167 return isKnownNonEqual(Values->first, Values->second, Depth + 1, Q); 3168 3169 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) { 3170 const PHINode *PN2 = cast<PHINode>(V2); 3171 // FIXME: This is missing a generalization to handle the case where one is 3172 // a PHI and another one isn't. 3173 if (isNonEqualPHIs(PN1, PN2, Depth, Q)) 3174 return true; 3175 }; 3176 } 3177 3178 if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q)) 3179 return true; 3180 3181 if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q)) 3182 return true; 3183 3184 if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V2, V1, Depth, Q)) 3185 return true; 3186 3187 if (V1->getType()->isIntOrIntVectorTy()) { 3188 // Are any known bits in V1 contradictory to known bits in V2? If V1 3189 // has a known zero where V2 has a known one, they must not be equal. 3190 KnownBits Known1 = computeKnownBits(V1, Depth, Q); 3191 if (!Known1.isUnknown()) { 3192 KnownBits Known2 = computeKnownBits(V2, Depth, Q); 3193 if (Known1.Zero.intersects(Known2.One) || 3194 Known2.Zero.intersects(Known1.One)) 3195 return true; 3196 } 3197 } 3198 3199 if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q)) 3200 return true; 3201 3202 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) || 3203 isNonEqualPointersWithRecursiveGEP(V2, V1, Q)) 3204 return true; 3205 3206 Value *A, *B; 3207 // PtrToInts are NonEqual if their Ptrs are NonEqual. 3208 // Check PtrToInt type matches the pointer size. 3209 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) && 3210 match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B)))) 3211 return isKnownNonEqual(A, B, Depth + 1, Q); 3212 3213 return false; 3214 } 3215 3216 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow). 3217 // Returns the input and lower/upper bounds. 3218 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, 3219 const APInt *&CLow, const APInt *&CHigh) { 3220 assert(isa<Operator>(Select) && 3221 cast<Operator>(Select)->getOpcode() == Instruction::Select && 3222 "Input should be a Select!"); 3223 3224 const Value *LHS = nullptr, *RHS = nullptr; 3225 SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; 3226 if (SPF != SPF_SMAX && SPF != SPF_SMIN) 3227 return false; 3228 3229 if (!match(RHS, m_APInt(CLow))) 3230 return false; 3231 3232 const Value *LHS2 = nullptr, *RHS2 = nullptr; 3233 SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; 3234 if (getInverseMinMaxFlavor(SPF) != SPF2) 3235 return false; 3236 3237 if (!match(RHS2, m_APInt(CHigh))) 3238 return false; 3239 3240 if (SPF == SPF_SMIN) 3241 std::swap(CLow, CHigh); 3242 3243 In = LHS2; 3244 return CLow->sle(*CHigh); 3245 } 3246 3247 static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II, 3248 const APInt *&CLow, 3249 const APInt *&CHigh) { 3250 assert((II->getIntrinsicID() == Intrinsic::smin || 3251 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax"); 3252 3253 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID()); 3254 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0)); 3255 if (!InnerII || InnerII->getIntrinsicID() != InverseID || 3256 !match(II->getArgOperand(1), m_APInt(CLow)) || 3257 !match(InnerII->getArgOperand(1), m_APInt(CHigh))) 3258 return false; 3259 3260 if (II->getIntrinsicID() == Intrinsic::smin) 3261 std::swap(CLow, CHigh); 3262 return CLow->sle(*CHigh); 3263 } 3264 3265 /// For vector constants, loop over the elements and find the constant with the 3266 /// minimum number of sign bits. Return 0 if the value is not a vector constant 3267 /// or if any element was not analyzed; otherwise, return the count for the 3268 /// element with the minimum number of sign bits. 3269 static unsigned computeNumSignBitsVectorConstant(const Value *V, 3270 const APInt &DemandedElts, 3271 unsigned TyBits) { 3272 const auto *CV = dyn_cast<Constant>(V); 3273 if (!CV || !isa<FixedVectorType>(CV->getType())) 3274 return 0; 3275 3276 unsigned MinSignBits = TyBits; 3277 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements(); 3278 for (unsigned i = 0; i != NumElts; ++i) { 3279 if (!DemandedElts[i]) 3280 continue; 3281 // If we find a non-ConstantInt, bail out. 3282 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i)); 3283 if (!Elt) 3284 return 0; 3285 3286 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); 3287 } 3288 3289 return MinSignBits; 3290 } 3291 3292 static unsigned ComputeNumSignBitsImpl(const Value *V, 3293 const APInt &DemandedElts, 3294 unsigned Depth, const SimplifyQuery &Q); 3295 3296 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, 3297 unsigned Depth, const SimplifyQuery &Q) { 3298 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q); 3299 assert(Result > 0 && "At least one sign bit needs to be present!"); 3300 return Result; 3301 } 3302 3303 /// Return the number of times the sign bit of the register is replicated into 3304 /// the other bits. We know that at least 1 bit is always equal to the sign bit 3305 /// (itself), but other cases can give us information. For example, immediately 3306 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each 3307 /// other, so we return 3. For vectors, return the number of sign bits for the 3308 /// vector element with the minimum number of known sign bits of the demanded 3309 /// elements in the vector specified by DemandedElts. 3310 static unsigned ComputeNumSignBitsImpl(const Value *V, 3311 const APInt &DemandedElts, 3312 unsigned Depth, const SimplifyQuery &Q) { 3313 Type *Ty = V->getType(); 3314 #ifndef NDEBUG 3315 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 3316 3317 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) { 3318 assert( 3319 FVTy->getNumElements() == DemandedElts.getBitWidth() && 3320 "DemandedElt width should equal the fixed vector number of elements"); 3321 } else { 3322 assert(DemandedElts == APInt(1, 1) && 3323 "DemandedElt width should be 1 for scalars"); 3324 } 3325 #endif 3326 3327 // We return the minimum number of sign bits that are guaranteed to be present 3328 // in V, so for undef we have to conservatively return 1. We don't have the 3329 // same behavior for poison though -- that's a FIXME today. 3330 3331 Type *ScalarTy = Ty->getScalarType(); 3332 unsigned TyBits = ScalarTy->isPointerTy() ? 3333 Q.DL.getPointerTypeSizeInBits(ScalarTy) : 3334 Q.DL.getTypeSizeInBits(ScalarTy); 3335 3336 unsigned Tmp, Tmp2; 3337 unsigned FirstAnswer = 1; 3338 3339 // Note that ConstantInt is handled by the general computeKnownBits case 3340 // below. 3341 3342 if (Depth == MaxAnalysisRecursionDepth) 3343 return 1; 3344 3345 if (auto *U = dyn_cast<Operator>(V)) { 3346 switch (Operator::getOpcode(V)) { 3347 default: break; 3348 case Instruction::SExt: 3349 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); 3350 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; 3351 3352 case Instruction::SDiv: { 3353 const APInt *Denominator; 3354 // sdiv X, C -> adds log(C) sign bits. 3355 if (match(U->getOperand(1), m_APInt(Denominator))) { 3356 3357 // Ignore non-positive denominator. 3358 if (!Denominator->isStrictlyPositive()) 3359 break; 3360 3361 // Calculate the incoming numerator bits. 3362 unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3363 3364 // Add floor(log(C)) bits to the numerator bits. 3365 return std::min(TyBits, NumBits + Denominator->logBase2()); 3366 } 3367 break; 3368 } 3369 3370 case Instruction::SRem: { 3371 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3372 3373 const APInt *Denominator; 3374 // srem X, C -> we know that the result is within [-C+1,C) when C is a 3375 // positive constant. This let us put a lower bound on the number of sign 3376 // bits. 3377 if (match(U->getOperand(1), m_APInt(Denominator))) { 3378 3379 // Ignore non-positive denominator. 3380 if (Denominator->isStrictlyPositive()) { 3381 // Calculate the leading sign bit constraints by examining the 3382 // denominator. Given that the denominator is positive, there are two 3383 // cases: 3384 // 3385 // 1. The numerator is positive. The result range is [0,C) and 3386 // [0,C) u< (1 << ceilLogBase2(C)). 3387 // 3388 // 2. The numerator is negative. Then the result range is (-C,0] and 3389 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). 3390 // 3391 // Thus a lower bound on the number of sign bits is `TyBits - 3392 // ceilLogBase2(C)`. 3393 3394 unsigned ResBits = TyBits - Denominator->ceilLogBase2(); 3395 Tmp = std::max(Tmp, ResBits); 3396 } 3397 } 3398 return Tmp; 3399 } 3400 3401 case Instruction::AShr: { 3402 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3403 // ashr X, C -> adds C sign bits. Vectors too. 3404 const APInt *ShAmt; 3405 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3406 if (ShAmt->uge(TyBits)) 3407 break; // Bad shift. 3408 unsigned ShAmtLimited = ShAmt->getZExtValue(); 3409 Tmp += ShAmtLimited; 3410 if (Tmp > TyBits) Tmp = TyBits; 3411 } 3412 return Tmp; 3413 } 3414 case Instruction::Shl: { 3415 const APInt *ShAmt; 3416 if (match(U->getOperand(1), m_APInt(ShAmt))) { 3417 // shl destroys sign bits. 3418 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3419 if (ShAmt->uge(TyBits) || // Bad shift. 3420 ShAmt->uge(Tmp)) break; // Shifted all sign bits out. 3421 Tmp2 = ShAmt->getZExtValue(); 3422 return Tmp - Tmp2; 3423 } 3424 break; 3425 } 3426 case Instruction::And: 3427 case Instruction::Or: 3428 case Instruction::Xor: // NOT is handled here. 3429 // Logical binary ops preserve the number of sign bits at the worst. 3430 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3431 if (Tmp != 1) { 3432 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3433 FirstAnswer = std::min(Tmp, Tmp2); 3434 // We computed what we know about the sign bits as our first 3435 // answer. Now proceed to the generic code that uses 3436 // computeKnownBits, and pick whichever answer is better. 3437 } 3438 break; 3439 3440 case Instruction::Select: { 3441 // If we have a clamp pattern, we know that the number of sign bits will 3442 // be the minimum of the clamp min/max range. 3443 const Value *X; 3444 const APInt *CLow, *CHigh; 3445 if (isSignedMinMaxClamp(U, X, CLow, CHigh)) 3446 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3447 3448 Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3449 if (Tmp == 1) break; 3450 Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); 3451 return std::min(Tmp, Tmp2); 3452 } 3453 3454 case Instruction::Add: 3455 // Add can have at most one carry bit. Thus we know that the output 3456 // is, at worst, one more bit than the inputs. 3457 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3458 if (Tmp == 1) break; 3459 3460 // Special case decrementing a value (ADD X, -1): 3461 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1))) 3462 if (CRHS->isAllOnesValue()) { 3463 KnownBits Known(TyBits); 3464 computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); 3465 3466 // If the input is known to be 0 or 1, the output is 0/-1, which is 3467 // all sign bits set. 3468 if ((Known.Zero | 1).isAllOnes()) 3469 return TyBits; 3470 3471 // If we are subtracting one from a positive number, there is no carry 3472 // out of the result. 3473 if (Known.isNonNegative()) 3474 return Tmp; 3475 } 3476 3477 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3478 if (Tmp2 == 1) break; 3479 return std::min(Tmp, Tmp2) - 1; 3480 3481 case Instruction::Sub: 3482 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3483 if (Tmp2 == 1) break; 3484 3485 // Handle NEG. 3486 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0))) 3487 if (CLHS->isNullValue()) { 3488 KnownBits Known(TyBits); 3489 computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); 3490 // If the input is known to be 0 or 1, the output is 0/-1, which is 3491 // all sign bits set. 3492 if ((Known.Zero | 1).isAllOnes()) 3493 return TyBits; 3494 3495 // If the input is known to be positive (the sign bit is known clear), 3496 // the output of the NEG has the same number of sign bits as the 3497 // input. 3498 if (Known.isNonNegative()) 3499 return Tmp2; 3500 3501 // Otherwise, we treat this like a SUB. 3502 } 3503 3504 // Sub can have at most one carry bit. Thus we know that the output 3505 // is, at worst, one more bit than the inputs. 3506 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3507 if (Tmp == 1) break; 3508 return std::min(Tmp, Tmp2) - 1; 3509 3510 case Instruction::Mul: { 3511 // The output of the Mul can be at most twice the valid bits in the 3512 // inputs. 3513 unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3514 if (SignBitsOp0 == 1) break; 3515 unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); 3516 if (SignBitsOp1 == 1) break; 3517 unsigned OutValidBits = 3518 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); 3519 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; 3520 } 3521 3522 case Instruction::PHI: { 3523 const PHINode *PN = cast<PHINode>(U); 3524 unsigned NumIncomingValues = PN->getNumIncomingValues(); 3525 // Don't analyze large in-degree PHIs. 3526 if (NumIncomingValues > 4) break; 3527 // Unreachable blocks may have zero-operand PHI nodes. 3528 if (NumIncomingValues == 0) break; 3529 3530 // Take the minimum of all incoming values. This can't infinitely loop 3531 // because of our depth threshold. 3532 SimplifyQuery RecQ = Q; 3533 Tmp = TyBits; 3534 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) { 3535 if (Tmp == 1) return Tmp; 3536 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator(); 3537 Tmp = std::min( 3538 Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ)); 3539 } 3540 return Tmp; 3541 } 3542 3543 case Instruction::Trunc: { 3544 // If the input contained enough sign bits that some remain after the 3545 // truncation, then we can make use of that. Otherwise we don't know 3546 // anything. 3547 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3548 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits(); 3549 if (Tmp > (OperandTyBits - TyBits)) 3550 return Tmp - (OperandTyBits - TyBits); 3551 3552 return 1; 3553 } 3554 3555 case Instruction::ExtractElement: 3556 // Look through extract element. At the moment we keep this simple and 3557 // skip tracking the specific element. But at least we might find 3558 // information valid for all elements of the vector (for example if vector 3559 // is sign extended, shifted, etc). 3560 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3561 3562 case Instruction::ShuffleVector: { 3563 // Collect the minimum number of sign bits that are shared by every vector 3564 // element referenced by the shuffle. 3565 auto *Shuf = dyn_cast<ShuffleVectorInst>(U); 3566 if (!Shuf) { 3567 // FIXME: Add support for shufflevector constant expressions. 3568 return 1; 3569 } 3570 APInt DemandedLHS, DemandedRHS; 3571 // For undef elements, we don't know anything about the common state of 3572 // the shuffle result. 3573 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 3574 return 1; 3575 Tmp = std::numeric_limits<unsigned>::max(); 3576 if (!!DemandedLHS) { 3577 const Value *LHS = Shuf->getOperand(0); 3578 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q); 3579 } 3580 // If we don't know anything, early out and try computeKnownBits 3581 // fall-back. 3582 if (Tmp == 1) 3583 break; 3584 if (!!DemandedRHS) { 3585 const Value *RHS = Shuf->getOperand(1); 3586 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q); 3587 Tmp = std::min(Tmp, Tmp2); 3588 } 3589 // If we don't know anything, early out and try computeKnownBits 3590 // fall-back. 3591 if (Tmp == 1) 3592 break; 3593 assert(Tmp <= TyBits && "Failed to determine minimum sign bits"); 3594 return Tmp; 3595 } 3596 case Instruction::Call: { 3597 if (const auto *II = dyn_cast<IntrinsicInst>(U)) { 3598 switch (II->getIntrinsicID()) { 3599 default: break; 3600 case Intrinsic::abs: 3601 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); 3602 if (Tmp == 1) break; 3603 3604 // Absolute value reduces number of sign bits by at most 1. 3605 return Tmp - 1; 3606 case Intrinsic::smin: 3607 case Intrinsic::smax: { 3608 const APInt *CLow, *CHigh; 3609 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh)) 3610 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); 3611 } 3612 } 3613 } 3614 } 3615 } 3616 } 3617 3618 // Finally, if we can prove that the top bits of the result are 0's or 1's, 3619 // use this information. 3620 3621 // If we can examine all elements of a vector constant successfully, we're 3622 // done (we can't do any better than that). If not, keep trying. 3623 if (unsigned VecSignBits = 3624 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits)) 3625 return VecSignBits; 3626 3627 KnownBits Known(TyBits); 3628 computeKnownBits(V, DemandedElts, Known, Depth, Q); 3629 3630 // If we know that the sign bit is either zero or one, determine the number of 3631 // identical bits in the top of the input value. 3632 return std::max(FirstAnswer, Known.countMinSignBits()); 3633 } 3634 3635 Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, 3636 const TargetLibraryInfo *TLI) { 3637 const Function *F = CB.getCalledFunction(); 3638 if (!F) 3639 return Intrinsic::not_intrinsic; 3640 3641 if (F->isIntrinsic()) 3642 return F->getIntrinsicID(); 3643 3644 // We are going to infer semantics of a library function based on mapping it 3645 // to an LLVM intrinsic. Check that the library function is available from 3646 // this callbase and in this environment. 3647 LibFunc Func; 3648 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) || 3649 !CB.onlyReadsMemory()) 3650 return Intrinsic::not_intrinsic; 3651 3652 switch (Func) { 3653 default: 3654 break; 3655 case LibFunc_sin: 3656 case LibFunc_sinf: 3657 case LibFunc_sinl: 3658 return Intrinsic::sin; 3659 case LibFunc_cos: 3660 case LibFunc_cosf: 3661 case LibFunc_cosl: 3662 return Intrinsic::cos; 3663 case LibFunc_exp: 3664 case LibFunc_expf: 3665 case LibFunc_expl: 3666 return Intrinsic::exp; 3667 case LibFunc_exp2: 3668 case LibFunc_exp2f: 3669 case LibFunc_exp2l: 3670 return Intrinsic::exp2; 3671 case LibFunc_log: 3672 case LibFunc_logf: 3673 case LibFunc_logl: 3674 return Intrinsic::log; 3675 case LibFunc_log10: 3676 case LibFunc_log10f: 3677 case LibFunc_log10l: 3678 return Intrinsic::log10; 3679 case LibFunc_log2: 3680 case LibFunc_log2f: 3681 case LibFunc_log2l: 3682 return Intrinsic::log2; 3683 case LibFunc_fabs: 3684 case LibFunc_fabsf: 3685 case LibFunc_fabsl: 3686 return Intrinsic::fabs; 3687 case LibFunc_fmin: 3688 case LibFunc_fminf: 3689 case LibFunc_fminl: 3690 return Intrinsic::minnum; 3691 case LibFunc_fmax: 3692 case LibFunc_fmaxf: 3693 case LibFunc_fmaxl: 3694 return Intrinsic::maxnum; 3695 case LibFunc_copysign: 3696 case LibFunc_copysignf: 3697 case LibFunc_copysignl: 3698 return Intrinsic::copysign; 3699 case LibFunc_floor: 3700 case LibFunc_floorf: 3701 case LibFunc_floorl: 3702 return Intrinsic::floor; 3703 case LibFunc_ceil: 3704 case LibFunc_ceilf: 3705 case LibFunc_ceill: 3706 return Intrinsic::ceil; 3707 case LibFunc_trunc: 3708 case LibFunc_truncf: 3709 case LibFunc_truncl: 3710 return Intrinsic::trunc; 3711 case LibFunc_rint: 3712 case LibFunc_rintf: 3713 case LibFunc_rintl: 3714 return Intrinsic::rint; 3715 case LibFunc_nearbyint: 3716 case LibFunc_nearbyintf: 3717 case LibFunc_nearbyintl: 3718 return Intrinsic::nearbyint; 3719 case LibFunc_round: 3720 case LibFunc_roundf: 3721 case LibFunc_roundl: 3722 return Intrinsic::round; 3723 case LibFunc_roundeven: 3724 case LibFunc_roundevenf: 3725 case LibFunc_roundevenl: 3726 return Intrinsic::roundeven; 3727 case LibFunc_pow: 3728 case LibFunc_powf: 3729 case LibFunc_powl: 3730 return Intrinsic::pow; 3731 case LibFunc_sqrt: 3732 case LibFunc_sqrtf: 3733 case LibFunc_sqrtl: 3734 return Intrinsic::sqrt; 3735 } 3736 3737 return Intrinsic::not_intrinsic; 3738 } 3739 3740 /// Deprecated, use computeKnownFPClass instead. 3741 /// 3742 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a 3743 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign 3744 /// bit despite comparing equal. 3745 static bool cannotBeOrderedLessThanZeroImpl(const Value *V, 3746 const DataLayout &DL, 3747 const TargetLibraryInfo *TLI, 3748 bool SignBitOnly, unsigned Depth) { 3749 // TODO: This function does not do the right thing when SignBitOnly is true 3750 // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform 3751 // which flips the sign bits of NaNs. See 3752 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3753 3754 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { 3755 return !CFP->getValueAPF().isNegative() || 3756 (!SignBitOnly && CFP->getValueAPF().isZero()); 3757 } 3758 3759 // Handle vector of constants. 3760 if (auto *CV = dyn_cast<Constant>(V)) { 3761 if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) { 3762 unsigned NumElts = CVFVTy->getNumElements(); 3763 for (unsigned i = 0; i != NumElts; ++i) { 3764 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i)); 3765 if (!CFP) 3766 return false; 3767 if (CFP->getValueAPF().isNegative() && 3768 (SignBitOnly || !CFP->getValueAPF().isZero())) 3769 return false; 3770 } 3771 3772 // All non-negative ConstantFPs. 3773 return true; 3774 } 3775 } 3776 3777 if (Depth == MaxAnalysisRecursionDepth) 3778 return false; 3779 3780 const Operator *I = dyn_cast<Operator>(V); 3781 if (!I) 3782 return false; 3783 3784 switch (I->getOpcode()) { 3785 default: 3786 break; 3787 // Unsigned integers are always nonnegative. 3788 case Instruction::UIToFP: 3789 return true; 3790 case Instruction::FDiv: 3791 // X / X is always exactly 1.0 or a NaN. 3792 if (I->getOperand(0) == I->getOperand(1) && 3793 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3794 return true; 3795 3796 // Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN). 3797 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3798 SignBitOnly, Depth + 1) && 3799 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3800 /*SignBitOnly*/ true, Depth + 1); 3801 case Instruction::FMul: 3802 // X * X is always non-negative or a NaN. 3803 if (I->getOperand(0) == I->getOperand(1) && 3804 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs())) 3805 return true; 3806 3807 [[fallthrough]]; 3808 case Instruction::FAdd: 3809 case Instruction::FRem: 3810 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3811 SignBitOnly, Depth + 1) && 3812 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3813 SignBitOnly, Depth + 1); 3814 case Instruction::Select: 3815 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3816 SignBitOnly, Depth + 1) && 3817 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3818 SignBitOnly, Depth + 1); 3819 case Instruction::FPExt: 3820 case Instruction::FPTrunc: 3821 // Widening/narrowing never change sign. 3822 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3823 SignBitOnly, Depth + 1); 3824 case Instruction::ExtractElement: 3825 // Look through extract element. At the moment we keep this simple and skip 3826 // tracking the specific element. But at least we might find information 3827 // valid for all elements of the vector. 3828 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3829 SignBitOnly, Depth + 1); 3830 case Instruction::Call: 3831 const auto *CI = cast<CallInst>(I); 3832 Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI); 3833 switch (IID) { 3834 default: 3835 break; 3836 case Intrinsic::canonicalize: 3837 case Intrinsic::arithmetic_fence: 3838 case Intrinsic::floor: 3839 case Intrinsic::ceil: 3840 case Intrinsic::trunc: 3841 case Intrinsic::rint: 3842 case Intrinsic::nearbyint: 3843 case Intrinsic::round: 3844 case Intrinsic::roundeven: 3845 case Intrinsic::fptrunc_round: 3846 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3847 SignBitOnly, Depth + 1); 3848 case Intrinsic::maxnum: { 3849 Value *V0 = I->getOperand(0), *V1 = I->getOperand(1); 3850 auto isPositiveNum = [&](Value *V) { 3851 if (SignBitOnly) { 3852 // With SignBitOnly, this is tricky because the result of 3853 // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is 3854 // a constant strictly greater than 0.0. 3855 const APFloat *C; 3856 return match(V, m_APFloat(C)) && 3857 *C > APFloat::getZero(C->getSemantics()); 3858 } 3859 3860 // -0.0 compares equal to 0.0, so if this operand is at least -0.0, 3861 // maxnum can't be ordered-less-than-zero. 3862 return isKnownNeverNaN(V, DL, TLI) && 3863 cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1); 3864 }; 3865 3866 // TODO: This could be improved. We could also check that neither operand 3867 // has its sign bit set (and at least 1 is not-NAN?). 3868 return isPositiveNum(V0) || isPositiveNum(V1); 3869 } 3870 3871 case Intrinsic::maximum: 3872 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3873 SignBitOnly, Depth + 1) || 3874 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3875 SignBitOnly, Depth + 1); 3876 case Intrinsic::minnum: 3877 case Intrinsic::minimum: 3878 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3879 SignBitOnly, Depth + 1) && 3880 cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, 3881 SignBitOnly, Depth + 1); 3882 case Intrinsic::exp: 3883 case Intrinsic::exp2: 3884 case Intrinsic::fabs: 3885 return true; 3886 case Intrinsic::copysign: 3887 // Only the sign operand matters. 3888 return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true, 3889 Depth + 1); 3890 case Intrinsic::sqrt: 3891 // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0. 3892 if (!SignBitOnly) 3893 return true; 3894 return CI->hasNoNaNs() && 3895 (CI->hasNoSignedZeros() || 3896 cannotBeNegativeZero(CI->getOperand(0), DL, TLI)); 3897 3898 case Intrinsic::powi: 3899 if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) { 3900 // powi(x,n) is non-negative if n is even. 3901 if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0) 3902 return true; 3903 } 3904 // TODO: This is not correct. Given that exp is an integer, here are the 3905 // ways that pow can return a negative value: 3906 // 3907 // pow(x, exp) --> negative if exp is odd and x is negative. 3908 // pow(-0, exp) --> -inf if exp is negative odd. 3909 // pow(-0, exp) --> -0 if exp is positive odd. 3910 // pow(-inf, exp) --> -0 if exp is negative odd. 3911 // pow(-inf, exp) --> -inf if exp is positive odd. 3912 // 3913 // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN, 3914 // but we must return false if x == -0. Unfortunately we do not currently 3915 // have a way of expressing this constraint. See details in 3916 // https://llvm.org/bugs/show_bug.cgi?id=31702. 3917 return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI, 3918 SignBitOnly, Depth + 1); 3919 3920 case Intrinsic::fma: 3921 case Intrinsic::fmuladd: 3922 // x*x+y is non-negative if y is non-negative. 3923 return I->getOperand(0) == I->getOperand(1) && 3924 (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) && 3925 cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI, 3926 SignBitOnly, Depth + 1); 3927 } 3928 break; 3929 } 3930 return false; 3931 } 3932 3933 bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL, 3934 const TargetLibraryInfo *TLI) { 3935 // FIXME: Use computeKnownFPClass and pass all arguments 3936 return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0); 3937 } 3938 3939 /// Return true if it's possible to assume IEEE treatment of input denormals in 3940 /// \p F for \p Val. 3941 static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) { 3942 Ty = Ty->getScalarType(); 3943 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE; 3944 } 3945 3946 static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3947 Ty = Ty->getScalarType(); 3948 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3949 return Mode.Input == DenormalMode::IEEE || 3950 Mode.Input == DenormalMode::PositiveZero; 3951 } 3952 3953 static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) { 3954 Ty = Ty->getScalarType(); 3955 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics()); 3956 return Mode.Output == DenormalMode::IEEE || 3957 Mode.Output == DenormalMode::PositiveZero; 3958 } 3959 3960 bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const { 3961 return isKnownNeverZero() && 3962 (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty)); 3963 } 3964 3965 bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F, 3966 Type *Ty) const { 3967 return isKnownNeverNegZero() && 3968 (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty)); 3969 } 3970 3971 bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F, 3972 Type *Ty) const { 3973 if (!isKnownNeverPosZero()) 3974 return false; 3975 3976 // If we know there are no denormals, nothing can be flushed to zero. 3977 if (isKnownNeverSubnormal()) 3978 return true; 3979 3980 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 3981 switch (Mode.Input) { 3982 case DenormalMode::IEEE: 3983 return true; 3984 case DenormalMode::PreserveSign: 3985 // Negative subnormal won't flush to +0 3986 return isKnownNeverPosSubnormal(); 3987 case DenormalMode::PositiveZero: 3988 default: 3989 // Both positive and negative subnormal could flush to +0 3990 return false; 3991 } 3992 3993 llvm_unreachable("covered switch over denormal mode"); 3994 } 3995 3996 void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F, 3997 Type *Ty) { 3998 KnownFPClasses = Src.KnownFPClasses; 3999 // If we aren't assuming the source can't be a zero, we don't have to check if 4000 // a denormal input could be flushed. 4001 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero()) 4002 return; 4003 4004 // If we know the input can't be a denormal, it can't be flushed to 0. 4005 if (Src.isKnownNeverSubnormal()) 4006 return; 4007 4008 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics()); 4009 4010 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE()) 4011 KnownFPClasses |= fcPosZero; 4012 4013 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) { 4014 if (Mode != DenormalMode::getPositiveZero()) 4015 KnownFPClasses |= fcNegZero; 4016 4017 if (Mode.Input == DenormalMode::PositiveZero || 4018 Mode.Output == DenormalMode::PositiveZero || 4019 Mode.Input == DenormalMode::Dynamic || 4020 Mode.Output == DenormalMode::Dynamic) 4021 KnownFPClasses |= fcPosZero; 4022 } 4023 } 4024 4025 void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src, 4026 const Function &F, Type *Ty) { 4027 propagateDenormal(Src, F, Ty); 4028 propagateNaN(Src, /*PreserveSign=*/true); 4029 } 4030 4031 /// Returns a pair of values, which if passed to llvm.is.fpclass, returns the 4032 /// same result as an fcmp with the given operands. 4033 std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, 4034 const Function &F, 4035 Value *LHS, Value *RHS, 4036 bool LookThroughSrc) { 4037 const APFloat *ConstRHS; 4038 if (!match(RHS, m_APFloatAllowUndef(ConstRHS))) 4039 return {nullptr, fcAllFlags}; 4040 4041 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc); 4042 } 4043 4044 std::pair<Value *, FPClassTest> 4045 llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, 4046 const APFloat *ConstRHS, bool LookThroughSrc) { 4047 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan 4048 if (Pred == FCmpInst::FCMP_ORD && !ConstRHS->isNaN()) 4049 return {LHS, ~fcNan}; 4050 4051 // fcmp uno x, zero|normal|subnormal|inf -> fcNan 4052 if (Pred == FCmpInst::FCMP_UNO && !ConstRHS->isNaN()) 4053 return {LHS, fcNan}; 4054 4055 if (ConstRHS->isZero()) { 4056 // Compares with fcNone are only exactly equal to fcZero if input denormals 4057 // are not flushed. 4058 // TODO: Handle DAZ by expanding masks to cover subnormal cases. 4059 if (Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO && 4060 !inputDenormalIsIEEE(F, LHS->getType())) 4061 return {nullptr, fcAllFlags}; 4062 4063 switch (Pred) { 4064 case FCmpInst::FCMP_OEQ: // Match x == 0.0 4065 return {LHS, fcZero}; 4066 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0) 4067 return {LHS, fcZero | fcNan}; 4068 case FCmpInst::FCMP_UNE: // Match (x != 0.0) 4069 return {LHS, ~fcZero}; 4070 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0 4071 return {LHS, ~fcNan & ~fcZero}; 4072 case FCmpInst::FCMP_ORD: 4073 // Canonical form of ord/uno is with a zero. We could also handle 4074 // non-canonical other non-NaN constants or LHS == RHS. 4075 return {LHS, ~fcNan}; 4076 case FCmpInst::FCMP_UNO: 4077 return {LHS, fcNan}; 4078 case FCmpInst::FCMP_OGT: // x > 0 4079 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf}; 4080 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0 4081 return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan}; 4082 case FCmpInst::FCMP_OGE: // x >= 0 4083 return {LHS, fcPositive | fcNegZero}; 4084 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0 4085 return {LHS, fcPositive | fcNegZero | fcNan}; 4086 case FCmpInst::FCMP_OLT: // x < 0 4087 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf}; 4088 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0 4089 return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan}; 4090 case FCmpInst::FCMP_OLE: // x <= 0 4091 return {LHS, fcNegative | fcPosZero}; 4092 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0 4093 return {LHS, fcNegative | fcPosZero | fcNan}; 4094 default: 4095 break; 4096 } 4097 4098 return {nullptr, fcAllFlags}; 4099 } 4100 4101 Value *Src = LHS; 4102 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src))); 4103 4104 // Compute the test mask that would return true for the ordered comparisons. 4105 FPClassTest Mask; 4106 4107 if (ConstRHS->isInfinity()) { 4108 switch (Pred) { 4109 case FCmpInst::FCMP_OEQ: 4110 case FCmpInst::FCMP_UNE: { 4111 // Match __builtin_isinf patterns 4112 // 4113 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf 4114 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf 4115 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf 4116 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false 4117 // 4118 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf 4119 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf 4120 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf 4121 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true 4122 4123 if (ConstRHS->isNegative()) { 4124 Mask = fcNegInf; 4125 if (IsFabs) 4126 Mask = fcNone; 4127 } else { 4128 Mask = fcPosInf; 4129 if (IsFabs) 4130 Mask |= fcNegInf; 4131 } 4132 4133 break; 4134 } 4135 case FCmpInst::FCMP_ONE: 4136 case FCmpInst::FCMP_UEQ: { 4137 // Match __builtin_isinf patterns 4138 // fcmp one x, -inf -> is_fpclass x, fcNegInf 4139 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan 4140 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan 4141 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan 4142 // 4143 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan 4144 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan 4145 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan 4146 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan 4147 if (ConstRHS->isNegative()) { 4148 Mask = ~fcNegInf & ~fcNan; 4149 if (IsFabs) 4150 Mask = ~fcNan; 4151 } else { 4152 Mask = ~fcPosInf & ~fcNan; 4153 if (IsFabs) 4154 Mask &= ~fcNegInf; 4155 } 4156 4157 break; 4158 } 4159 case FCmpInst::FCMP_OLT: 4160 case FCmpInst::FCMP_UGE: { 4161 if (ConstRHS->isNegative()) { 4162 // No value is ordered and less than negative infinity. 4163 // All values are unordered with or at least negative infinity. 4164 // fcmp olt x, -inf -> false 4165 // fcmp uge x, -inf -> true 4166 Mask = fcNone; 4167 break; 4168 } 4169 4170 // fcmp olt fabs(x), +inf -> fcFinite 4171 // fcmp uge fabs(x), +inf -> ~fcFinite 4172 // fcmp olt x, +inf -> fcFinite|fcNegInf 4173 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf) 4174 Mask = fcFinite; 4175 if (!IsFabs) 4176 Mask |= fcNegInf; 4177 break; 4178 } 4179 case FCmpInst::FCMP_OGE: 4180 case FCmpInst::FCMP_ULT: { 4181 if (ConstRHS->isNegative()) { 4182 // fcmp oge x, -inf -> ~fcNan 4183 // fcmp oge fabs(x), -inf -> ~fcNan 4184 // fcmp ult x, -inf -> fcNan 4185 // fcmp ult fabs(x), -inf -> fcNan 4186 Mask = ~fcNan; 4187 break; 4188 } 4189 4190 // fcmp oge fabs(x), +inf -> fcInf 4191 // fcmp oge x, +inf -> fcPosInf 4192 // fcmp ult fabs(x), +inf -> ~fcInf 4193 // fcmp ult x, +inf -> ~fcPosInf 4194 Mask = fcPosInf; 4195 if (IsFabs) 4196 Mask |= fcNegInf; 4197 break; 4198 } 4199 case FCmpInst::FCMP_OGT: 4200 case FCmpInst::FCMP_ULE: { 4201 if (ConstRHS->isNegative()) { 4202 // fcmp ogt x, -inf -> fcmp one x, -inf 4203 // fcmp ogt fabs(x), -inf -> fcmp ord x, x 4204 // fcmp ule x, -inf -> fcmp ueq x, -inf 4205 // fcmp ule fabs(x), -inf -> fcmp uno x, x 4206 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan); 4207 break; 4208 } 4209 4210 // No value is ordered and greater than infinity. 4211 Mask = fcNone; 4212 break; 4213 } 4214 default: 4215 return {nullptr, fcAllFlags}; 4216 } 4217 } else if (ConstRHS->isSmallestNormalized() && !ConstRHS->isNegative()) { 4218 // Match pattern that's used in __builtin_isnormal. 4219 switch (Pred) { 4220 case FCmpInst::FCMP_OLT: 4221 case FCmpInst::FCMP_UGE: { 4222 // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero 4223 // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero 4224 // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf 4225 // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero) 4226 Mask = fcZero | fcSubnormal; 4227 if (!IsFabs) 4228 Mask |= fcNegNormal | fcNegInf; 4229 4230 break; 4231 } 4232 case FCmpInst::FCMP_OGE: 4233 case FCmpInst::FCMP_ULT: { 4234 // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf 4235 // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal 4236 // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf) 4237 // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal) 4238 Mask = fcPosInf | fcPosNormal; 4239 if (IsFabs) 4240 Mask |= fcNegInf | fcNegNormal; 4241 break; 4242 } 4243 default: 4244 return {nullptr, fcAllFlags}; 4245 } 4246 } else if (ConstRHS->isNaN()) { 4247 // fcmp o__ x, nan -> false 4248 // fcmp u__ x, nan -> true 4249 Mask = fcNone; 4250 } else 4251 return {nullptr, fcAllFlags}; 4252 4253 // Invert the comparison for the unordered cases. 4254 if (FCmpInst::isUnordered(Pred)) 4255 Mask = ~Mask; 4256 4257 return {Src, Mask}; 4258 } 4259 4260 static FPClassTest computeKnownFPClassFromAssumes(const Value *V, 4261 const SimplifyQuery &Q) { 4262 FPClassTest KnownFromAssume = fcAllFlags; 4263 4264 // Try to restrict the floating-point classes based on information from 4265 // assumptions. 4266 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { 4267 if (!AssumeVH) 4268 continue; 4269 CallInst *I = cast<CallInst>(AssumeVH); 4270 const Function *F = I->getFunction(); 4271 4272 assert(F == Q.CxtI->getParent()->getParent() && 4273 "Got assumption for the wrong function!"); 4274 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 4275 "must be an assume intrinsic"); 4276 4277 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) 4278 continue; 4279 4280 CmpInst::Predicate Pred; 4281 Value *LHS, *RHS; 4282 uint64_t ClassVal = 0; 4283 if (match(I->getArgOperand(0), m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) { 4284 auto [TestedValue, TestedMask] = 4285 fcmpToClassTest(Pred, *F, LHS, RHS, true); 4286 // First see if we can fold in fabs/fneg into the test. 4287 if (TestedValue == V) 4288 KnownFromAssume &= TestedMask; 4289 else { 4290 // Try again without the lookthrough if we found a different source 4291 // value. 4292 auto [TestedValue, TestedMask] = 4293 fcmpToClassTest(Pred, *F, LHS, RHS, false); 4294 if (TestedValue == V) 4295 KnownFromAssume &= TestedMask; 4296 } 4297 } else if (match(I->getArgOperand(0), 4298 m_Intrinsic<Intrinsic::is_fpclass>( 4299 m_Value(LHS), m_ConstantInt(ClassVal)))) { 4300 KnownFromAssume &= static_cast<FPClassTest>(ClassVal); 4301 } 4302 } 4303 4304 return KnownFromAssume; 4305 } 4306 4307 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4308 FPClassTest InterestedClasses, KnownFPClass &Known, 4309 unsigned Depth, const SimplifyQuery &Q); 4310 4311 static void computeKnownFPClass(const Value *V, KnownFPClass &Known, 4312 FPClassTest InterestedClasses, unsigned Depth, 4313 const SimplifyQuery &Q) { 4314 auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); 4315 APInt DemandedElts = 4316 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); 4317 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q); 4318 } 4319 4320 static void computeKnownFPClassForFPTrunc(const Operator *Op, 4321 const APInt &DemandedElts, 4322 FPClassTest InterestedClasses, 4323 KnownFPClass &Known, unsigned Depth, 4324 const SimplifyQuery &Q) { 4325 if ((InterestedClasses & 4326 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone) 4327 return; 4328 4329 KnownFPClass KnownSrc; 4330 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4331 KnownSrc, Depth + 1, Q); 4332 4333 // Sign should be preserved 4334 // TODO: Handle cannot be ordered greater than zero 4335 if (KnownSrc.cannotBeOrderedLessThanZero()) 4336 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4337 4338 Known.propagateNaN(KnownSrc, true); 4339 4340 // Infinity needs a range check. 4341 } 4342 4343 // TODO: Merge implementation of cannotBeOrderedLessThanZero into here. 4344 void computeKnownFPClass(const Value *V, const APInt &DemandedElts, 4345 FPClassTest InterestedClasses, KnownFPClass &Known, 4346 unsigned Depth, const SimplifyQuery &Q) { 4347 assert(Known.isUnknown() && "should not be called with known information"); 4348 4349 if (!DemandedElts) { 4350 // No demanded elts, better to assume we don't know anything. 4351 Known.resetAll(); 4352 return; 4353 } 4354 4355 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 4356 4357 if (auto *CFP = dyn_cast_or_null<ConstantFP>(V)) { 4358 Known.KnownFPClasses = CFP->getValueAPF().classify(); 4359 Known.SignBit = CFP->isNegative(); 4360 return; 4361 } 4362 4363 // Try to handle fixed width vector constants 4364 auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); 4365 const Constant *CV = dyn_cast<Constant>(V); 4366 if (VFVTy && CV) { 4367 Known.KnownFPClasses = fcNone; 4368 4369 // For vectors, verify that each element is not NaN. 4370 unsigned NumElts = VFVTy->getNumElements(); 4371 for (unsigned i = 0; i != NumElts; ++i) { 4372 Constant *Elt = CV->getAggregateElement(i); 4373 if (!Elt) { 4374 Known = KnownFPClass(); 4375 return; 4376 } 4377 if (isa<UndefValue>(Elt)) 4378 continue; 4379 auto *CElt = dyn_cast<ConstantFP>(Elt); 4380 if (!CElt) { 4381 Known = KnownFPClass(); 4382 return; 4383 } 4384 4385 KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()}; 4386 Known |= KnownElt; 4387 } 4388 4389 return; 4390 } 4391 4392 FPClassTest KnownNotFromFlags = fcNone; 4393 if (const auto *CB = dyn_cast<CallBase>(V)) 4394 KnownNotFromFlags |= CB->getRetNoFPClass(); 4395 else if (const auto *Arg = dyn_cast<Argument>(V)) 4396 KnownNotFromFlags |= Arg->getNoFPClass(); 4397 4398 const Operator *Op = dyn_cast<Operator>(V); 4399 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) { 4400 if (FPOp->hasNoNaNs()) 4401 KnownNotFromFlags |= fcNan; 4402 if (FPOp->hasNoInfs()) 4403 KnownNotFromFlags |= fcInf; 4404 } 4405 4406 if (Q.AC) { 4407 FPClassTest AssumedClasses = computeKnownFPClassFromAssumes(V, Q); 4408 KnownNotFromFlags |= ~AssumedClasses; 4409 } 4410 4411 // We no longer need to find out about these bits from inputs if we can 4412 // assume this from flags/attributes. 4413 InterestedClasses &= ~KnownNotFromFlags; 4414 4415 auto ClearClassesFromFlags = make_scope_exit([=, &Known] { 4416 Known.knownNot(KnownNotFromFlags); 4417 }); 4418 4419 if (!Op) 4420 return; 4421 4422 // All recursive calls that increase depth must come after this. 4423 if (Depth == MaxAnalysisRecursionDepth) 4424 return; 4425 4426 const unsigned Opc = Op->getOpcode(); 4427 switch (Opc) { 4428 case Instruction::FNeg: { 4429 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 4430 Known, Depth + 1, Q); 4431 Known.fneg(); 4432 break; 4433 } 4434 case Instruction::Select: { 4435 Value *Cond = Op->getOperand(0); 4436 Value *LHS = Op->getOperand(1); 4437 Value *RHS = Op->getOperand(2); 4438 4439 FPClassTest FilterLHS = fcAllFlags; 4440 FPClassTest FilterRHS = fcAllFlags; 4441 4442 Value *TestedValue = nullptr; 4443 FPClassTest TestedMask = fcNone; 4444 uint64_t ClassVal = 0; 4445 const Function *F = cast<Instruction>(Op)->getFunction(); 4446 CmpInst::Predicate Pred; 4447 Value *CmpLHS, *CmpRHS; 4448 if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) { 4449 // If the select filters out a value based on the class, it no longer 4450 // participates in the class of the result 4451 4452 // TODO: In some degenerate cases we can infer something if we try again 4453 // without looking through sign operations. 4454 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; 4455 std::tie(TestedValue, TestedMask) = 4456 fcmpToClassTest(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg); 4457 } else if (match(Cond, 4458 m_Intrinsic<Intrinsic::is_fpclass>( 4459 m_Value(TestedValue), m_ConstantInt(ClassVal)))) { 4460 TestedMask = static_cast<FPClassTest>(ClassVal); 4461 } 4462 4463 if (TestedValue == LHS) { 4464 // match !isnan(x) ? x : y 4465 FilterLHS = TestedMask; 4466 } else if (TestedValue == RHS) { 4467 // match !isnan(x) ? y : x 4468 FilterRHS = ~TestedMask; 4469 } 4470 4471 KnownFPClass Known2; 4472 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known, 4473 Depth + 1, Q); 4474 Known.KnownFPClasses &= FilterLHS; 4475 4476 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS, 4477 Known2, Depth + 1, Q); 4478 Known2.KnownFPClasses &= FilterRHS; 4479 4480 Known |= Known2; 4481 break; 4482 } 4483 case Instruction::Call: { 4484 const CallInst *II = cast<CallInst>(Op); 4485 const Intrinsic::ID IID = II->getIntrinsicID(); 4486 switch (IID) { 4487 case Intrinsic::fabs: { 4488 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { 4489 // If we only care about the sign bit we don't need to inspect the 4490 // operand. 4491 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 4492 InterestedClasses, Known, Depth + 1, Q); 4493 } 4494 4495 Known.fabs(); 4496 break; 4497 } 4498 case Intrinsic::copysign: { 4499 KnownFPClass KnownSign; 4500 4501 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4502 Known, Depth + 1, Q); 4503 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4504 KnownSign, Depth + 1, Q); 4505 Known.copysign(KnownSign); 4506 break; 4507 } 4508 case Intrinsic::fma: 4509 case Intrinsic::fmuladd: { 4510 if ((InterestedClasses & fcNegative) == fcNone) 4511 break; 4512 4513 if (II->getArgOperand(0) != II->getArgOperand(1)) 4514 break; 4515 4516 // The multiply cannot be -0 and therefore the add can't be -0 4517 Known.knownNot(fcNegZero); 4518 4519 // x * x + y is non-negative if y is non-negative. 4520 KnownFPClass KnownAddend; 4521 computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses, 4522 KnownAddend, Depth + 1, Q); 4523 4524 // TODO: Known sign bit with no nans 4525 if (KnownAddend.cannotBeOrderedLessThanZero()) 4526 Known.knownNot(fcNegative); 4527 break; 4528 } 4529 case Intrinsic::sqrt: 4530 case Intrinsic::experimental_constrained_sqrt: { 4531 KnownFPClass KnownSrc; 4532 FPClassTest InterestedSrcs = InterestedClasses; 4533 if (InterestedClasses & fcNan) 4534 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4535 4536 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4537 KnownSrc, Depth + 1, Q); 4538 4539 if (KnownSrc.isKnownNeverPosInfinity()) 4540 Known.knownNot(fcPosInf); 4541 if (KnownSrc.isKnownNever(fcSNan)) 4542 Known.knownNot(fcSNan); 4543 4544 // Any negative value besides -0 returns a nan. 4545 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4546 Known.knownNot(fcNan); 4547 4548 // The only negative value that can be returned is -0 for -0 inputs. 4549 Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); 4550 4551 // If the input denormal mode could be PreserveSign, a negative 4552 // subnormal input could produce a negative zero output. 4553 const Function *F = II->getFunction(); 4554 if (Q.IIQ.hasNoSignedZeros(II) || 4555 (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { 4556 Known.knownNot(fcNegZero); 4557 if (KnownSrc.isKnownNeverNaN()) 4558 Known.SignBit = false; 4559 } 4560 4561 break; 4562 } 4563 case Intrinsic::sin: 4564 case Intrinsic::cos: { 4565 // Return NaN on infinite inputs. 4566 KnownFPClass KnownSrc; 4567 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4568 KnownSrc, Depth + 1, Q); 4569 Known.knownNot(fcInf); 4570 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) 4571 Known.knownNot(fcNan); 4572 break; 4573 } 4574 case Intrinsic::maxnum: 4575 case Intrinsic::minnum: 4576 case Intrinsic::minimum: 4577 case Intrinsic::maximum: { 4578 KnownFPClass KnownLHS, KnownRHS; 4579 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4580 KnownLHS, Depth + 1, Q); 4581 computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, 4582 KnownRHS, Depth + 1, Q); 4583 4584 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); 4585 Known = KnownLHS | KnownRHS; 4586 4587 // If either operand is not NaN, the result is not NaN. 4588 if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)) 4589 Known.knownNot(fcNan); 4590 4591 if (IID == Intrinsic::maxnum) { 4592 // If at least one operand is known to be positive, the result must be 4593 // positive. 4594 if ((KnownLHS.cannotBeOrderedLessThanZero() && 4595 KnownLHS.isKnownNeverNaN()) || 4596 (KnownRHS.cannotBeOrderedLessThanZero() && 4597 KnownRHS.isKnownNeverNaN())) 4598 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4599 } else if (IID == Intrinsic::maximum) { 4600 // If at least one operand is known to be positive, the result must be 4601 // positive. 4602 if (KnownLHS.cannotBeOrderedLessThanZero() || 4603 KnownRHS.cannotBeOrderedLessThanZero()) 4604 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4605 } else if (IID == Intrinsic::minnum) { 4606 // If at least one operand is known to be negative, the result must be 4607 // negative. 4608 if ((KnownLHS.cannotBeOrderedGreaterThanZero() && 4609 KnownLHS.isKnownNeverNaN()) || 4610 (KnownRHS.cannotBeOrderedGreaterThanZero() && 4611 KnownRHS.isKnownNeverNaN())) 4612 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4613 } else { 4614 // If at least one operand is known to be negative, the result must be 4615 // negative. 4616 if (KnownLHS.cannotBeOrderedGreaterThanZero() || 4617 KnownRHS.cannotBeOrderedGreaterThanZero()) 4618 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4619 } 4620 4621 // Fixup zero handling if denormals could be returned as a zero. 4622 // 4623 // As there's no spec for denormal flushing, be conservative with the 4624 // treatment of denormals that could be flushed to zero. For older 4625 // subtargets on AMDGPU the min/max instructions would not flush the 4626 // output and return the original value. 4627 // 4628 // TODO: This could be refined based on the sign 4629 if ((Known.KnownFPClasses & fcZero) != fcNone && 4630 !Known.isKnownNeverSubnormal()) { 4631 const Function *Parent = II->getFunction(); 4632 if (!Parent) 4633 break; 4634 4635 DenormalMode Mode = Parent->getDenormalMode( 4636 II->getType()->getScalarType()->getFltSemantics()); 4637 if (Mode != DenormalMode::getIEEE()) 4638 Known.KnownFPClasses |= fcZero; 4639 } 4640 4641 break; 4642 } 4643 case Intrinsic::canonicalize: { 4644 KnownFPClass KnownSrc; 4645 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4646 KnownSrc, Depth + 1, Q); 4647 4648 // This is essentially a stronger form of 4649 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't 4650 // actually have an IR canonicalization guarantee. 4651 4652 // Canonicalize may flush denormals to zero, so we have to consider the 4653 // denormal mode to preserve known-not-0 knowledge. 4654 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; 4655 4656 // Stronger version of propagateNaN 4657 // Canonicalize is guaranteed to quiet signaling nans. 4658 if (KnownSrc.isKnownNeverNaN()) 4659 Known.knownNot(fcNan); 4660 else 4661 Known.knownNot(fcSNan); 4662 4663 const Function *F = II->getFunction(); 4664 if (!F) 4665 break; 4666 4667 // If the parent function flushes denormals, the canonical output cannot 4668 // be a denormal. 4669 const fltSemantics &FPType = 4670 II->getType()->getScalarType()->getFltSemantics(); 4671 DenormalMode DenormMode = F->getDenormalMode(FPType); 4672 if (DenormMode == DenormalMode::getIEEE()) { 4673 if (KnownSrc.isKnownNever(fcPosZero)) 4674 Known.knownNot(fcPosZero); 4675 if (KnownSrc.isKnownNever(fcNegZero)) 4676 Known.knownNot(fcNegZero); 4677 break; 4678 } 4679 4680 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) 4681 Known.knownNot(fcSubnormal); 4682 4683 if (DenormMode.Input == DenormalMode::PositiveZero || 4684 (DenormMode.Output == DenormalMode::PositiveZero && 4685 DenormMode.Input == DenormalMode::IEEE)) 4686 Known.knownNot(fcNegZero); 4687 4688 break; 4689 } 4690 case Intrinsic::trunc: 4691 case Intrinsic::floor: 4692 case Intrinsic::ceil: 4693 case Intrinsic::rint: 4694 case Intrinsic::nearbyint: 4695 case Intrinsic::round: 4696 case Intrinsic::roundeven: { 4697 KnownFPClass KnownSrc; 4698 FPClassTest InterestedSrcs = InterestedClasses; 4699 if (InterestedSrcs & fcPosFinite) 4700 InterestedSrcs |= fcPosFinite; 4701 if (InterestedSrcs & fcNegFinite) 4702 InterestedSrcs |= fcNegFinite; 4703 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4704 KnownSrc, Depth + 1, Q); 4705 4706 // Integer results cannot be subnormal. 4707 Known.knownNot(fcSubnormal); 4708 4709 Known.propagateNaN(KnownSrc, true); 4710 4711 // Pass through infinities, except PPC_FP128 is a special case for 4712 // intrinsics other than trunc. 4713 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) { 4714 if (KnownSrc.isKnownNeverPosInfinity()) 4715 Known.knownNot(fcPosInf); 4716 if (KnownSrc.isKnownNeverNegInfinity()) 4717 Known.knownNot(fcNegInf); 4718 } 4719 4720 // Negative round ups to 0 produce -0 4721 if (KnownSrc.isKnownNever(fcPosFinite)) 4722 Known.knownNot(fcPosFinite); 4723 if (KnownSrc.isKnownNever(fcNegFinite)) 4724 Known.knownNot(fcNegFinite); 4725 4726 break; 4727 } 4728 case Intrinsic::exp: 4729 case Intrinsic::exp2: 4730 case Intrinsic::exp10: { 4731 Known.knownNot(fcNegative); 4732 if ((InterestedClasses & fcNan) == fcNone) 4733 break; 4734 4735 KnownFPClass KnownSrc; 4736 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4737 KnownSrc, Depth + 1, Q); 4738 if (KnownSrc.isKnownNeverNaN()) { 4739 Known.knownNot(fcNan); 4740 Known.SignBit = false; 4741 } 4742 4743 break; 4744 } 4745 case Intrinsic::fptrunc_round: { 4746 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 4747 Depth, Q); 4748 break; 4749 } 4750 case Intrinsic::log: 4751 case Intrinsic::log10: 4752 case Intrinsic::log2: 4753 case Intrinsic::experimental_constrained_log: 4754 case Intrinsic::experimental_constrained_log10: 4755 case Intrinsic::experimental_constrained_log2: { 4756 // log(+inf) -> +inf 4757 // log([+-]0.0) -> -inf 4758 // log(-inf) -> nan 4759 // log(-x) -> nan 4760 if ((InterestedClasses & (fcNan | fcInf)) == fcNone) 4761 break; 4762 4763 FPClassTest InterestedSrcs = InterestedClasses; 4764 if ((InterestedClasses & fcNegInf) != fcNone) 4765 InterestedSrcs |= fcZero | fcSubnormal; 4766 if ((InterestedClasses & fcNan) != fcNone) 4767 InterestedSrcs |= fcNan | (fcNegative & ~fcNan); 4768 4769 KnownFPClass KnownSrc; 4770 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, 4771 KnownSrc, Depth + 1, Q); 4772 4773 if (KnownSrc.isKnownNeverPosInfinity()) 4774 Known.knownNot(fcPosInf); 4775 4776 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 4777 Known.knownNot(fcNan); 4778 4779 const Function *F = II->getFunction(); 4780 if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType())) 4781 Known.knownNot(fcNegInf); 4782 4783 break; 4784 } 4785 case Intrinsic::powi: { 4786 if ((InterestedClasses & fcNegative) == fcNone) 4787 break; 4788 4789 const Value *Exp = II->getArgOperand(1); 4790 Type *ExpTy = Exp->getType(); 4791 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth(); 4792 KnownBits ExponentKnownBits(BitWidth); 4793 computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1), 4794 ExponentKnownBits, Depth + 1, Q); 4795 4796 if (ExponentKnownBits.Zero[0]) { // Is even 4797 Known.knownNot(fcNegative); 4798 break; 4799 } 4800 4801 // Given that exp is an integer, here are the 4802 // ways that pow can return a negative value: 4803 // 4804 // pow(-x, exp) --> negative if exp is odd and x is negative. 4805 // pow(-0, exp) --> -inf if exp is negative odd. 4806 // pow(-0, exp) --> -0 if exp is positive odd. 4807 // pow(-inf, exp) --> -0 if exp is negative odd. 4808 // pow(-inf, exp) --> -inf if exp is positive odd. 4809 KnownFPClass KnownSrc; 4810 computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative, 4811 KnownSrc, Depth + 1, Q); 4812 if (KnownSrc.isKnownNever(fcNegative)) 4813 Known.knownNot(fcNegative); 4814 break; 4815 } 4816 case Intrinsic::ldexp: { 4817 KnownFPClass KnownSrc; 4818 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4819 KnownSrc, Depth + 1, Q); 4820 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); 4821 4822 // Sign is preserved, but underflows may produce zeroes. 4823 if (KnownSrc.isKnownNever(fcNegative)) 4824 Known.knownNot(fcNegative); 4825 else if (KnownSrc.cannotBeOrderedLessThanZero()) 4826 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4827 4828 if (KnownSrc.isKnownNever(fcPositive)) 4829 Known.knownNot(fcPositive); 4830 else if (KnownSrc.cannotBeOrderedGreaterThanZero()) 4831 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 4832 4833 // Can refine inf/zero handling based on the exponent operand. 4834 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; 4835 if ((InterestedClasses & ExpInfoMask) == fcNone) 4836 break; 4837 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) 4838 break; 4839 4840 const fltSemantics &Flt = 4841 II->getType()->getScalarType()->getFltSemantics(); 4842 unsigned Precision = APFloat::semanticsPrecision(Flt); 4843 const Value *ExpArg = II->getArgOperand(1); 4844 ConstantRange ExpRange = computeConstantRange( 4845 ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1); 4846 4847 const int MantissaBits = Precision - 1; 4848 if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits))) 4849 Known.knownNot(fcSubnormal); 4850 4851 const Function *F = II->getFunction(); 4852 const APInt *ConstVal = ExpRange.getSingleElement(); 4853 if (ConstVal && ConstVal->isZero()) { 4854 // ldexp(x, 0) -> x, so propagate everything. 4855 Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType()); 4856 } else if (ExpRange.isAllNegative()) { 4857 // If we know the power is <= 0, can't introduce inf 4858 if (KnownSrc.isKnownNeverPosInfinity()) 4859 Known.knownNot(fcPosInf); 4860 if (KnownSrc.isKnownNeverNegInfinity()) 4861 Known.knownNot(fcNegInf); 4862 } else if (ExpRange.isAllNonNegative()) { 4863 // If we know the power is >= 0, can't introduce subnormal or zero 4864 if (KnownSrc.isKnownNeverPosSubnormal()) 4865 Known.knownNot(fcPosSubnormal); 4866 if (KnownSrc.isKnownNeverNegSubnormal()) 4867 Known.knownNot(fcNegSubnormal); 4868 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType())) 4869 Known.knownNot(fcPosZero); 4870 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())) 4871 Known.knownNot(fcNegZero); 4872 } 4873 4874 break; 4875 } 4876 case Intrinsic::arithmetic_fence: { 4877 computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, 4878 Known, Depth + 1, Q); 4879 break; 4880 } 4881 case Intrinsic::experimental_constrained_sitofp: 4882 case Intrinsic::experimental_constrained_uitofp: 4883 // Cannot produce nan 4884 Known.knownNot(fcNan); 4885 4886 // sitofp and uitofp turn into +0.0 for zero. 4887 Known.knownNot(fcNegZero); 4888 4889 // Integers cannot be subnormal 4890 Known.knownNot(fcSubnormal); 4891 4892 if (IID == Intrinsic::experimental_constrained_uitofp) 4893 Known.signBitMustBeZero(); 4894 4895 // TODO: Copy inf handling from instructions 4896 break; 4897 default: 4898 break; 4899 } 4900 4901 break; 4902 } 4903 case Instruction::FAdd: 4904 case Instruction::FSub: { 4905 KnownFPClass KnownLHS, KnownRHS; 4906 bool WantNegative = 4907 Op->getOpcode() == Instruction::FAdd && 4908 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; 4909 bool WantNaN = (InterestedClasses & fcNan) != fcNone; 4910 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; 4911 4912 if (!WantNaN && !WantNegative && !WantNegZero) 4913 break; 4914 4915 FPClassTest InterestedSrcs = InterestedClasses; 4916 if (WantNegative) 4917 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 4918 if (InterestedClasses & fcNan) 4919 InterestedSrcs |= fcInf; 4920 computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs, 4921 KnownRHS, Depth + 1, Q); 4922 4923 if ((WantNaN && KnownRHS.isKnownNeverNaN()) || 4924 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || 4925 WantNegZero || Opc == Instruction::FSub) { 4926 4927 // RHS is canonically cheaper to compute. Skip inspecting the LHS if 4928 // there's no point. 4929 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs, 4930 KnownLHS, Depth + 1, Q); 4931 // Adding positive and negative infinity produces NaN. 4932 // TODO: Check sign of infinities. 4933 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 4934 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) 4935 Known.knownNot(fcNan); 4936 4937 // FIXME: Context function should always be passed in separately 4938 const Function *F = cast<Instruction>(Op)->getFunction(); 4939 4940 if (Op->getOpcode() == Instruction::FAdd) { 4941 if (KnownLHS.cannotBeOrderedLessThanZero() && 4942 KnownRHS.cannotBeOrderedLessThanZero()) 4943 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 4944 if (!F) 4945 break; 4946 4947 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. 4948 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4949 KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) && 4950 // Make sure output negative denormal can't flush to -0 4951 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4952 Known.knownNot(fcNegZero); 4953 } else { 4954 if (!F) 4955 break; 4956 4957 // Only fsub -0, +0 can return -0 4958 if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) || 4959 KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) && 4960 // Make sure output negative denormal can't flush to -0 4961 outputDenormalIsIEEEOrPosZero(*F, Op->getType())) 4962 Known.knownNot(fcNegZero); 4963 } 4964 } 4965 4966 break; 4967 } 4968 case Instruction::FMul: { 4969 // X * X is always non-negative or a NaN. 4970 if (Op->getOperand(0) == Op->getOperand(1)) 4971 Known.knownNot(fcNegative); 4972 4973 if ((InterestedClasses & fcNan) != fcNan) 4974 break; 4975 4976 // fcSubnormal is only needed in case of DAZ. 4977 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; 4978 4979 KnownFPClass KnownLHS, KnownRHS; 4980 computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS, 4981 Depth + 1, Q); 4982 if (!KnownRHS.isKnownNeverNaN()) 4983 break; 4984 4985 computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS, 4986 Depth + 1, Q); 4987 if (!KnownLHS.isKnownNeverNaN()) 4988 break; 4989 4990 // If 0 * +/-inf produces NaN. 4991 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { 4992 Known.knownNot(fcNan); 4993 break; 4994 } 4995 4996 const Function *F = cast<Instruction>(Op)->getFunction(); 4997 if (!F) 4998 break; 4999 5000 if ((KnownRHS.isKnownNeverInfinity() || 5001 KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) && 5002 (KnownLHS.isKnownNeverInfinity() || 5003 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType()))) 5004 Known.knownNot(fcNan); 5005 5006 break; 5007 } 5008 case Instruction::FDiv: 5009 case Instruction::FRem: { 5010 if (Op->getOperand(0) == Op->getOperand(1)) { 5011 // TODO: Could filter out snan if we inspect the operand 5012 if (Op->getOpcode() == Instruction::FDiv) { 5013 // X / X is always exactly 1.0 or a NaN. 5014 Known.KnownFPClasses = fcNan | fcPosNormal; 5015 } else { 5016 // X % X is always exactly [+-]0.0 or a NaN. 5017 Known.KnownFPClasses = fcNan | fcZero; 5018 } 5019 5020 break; 5021 } 5022 5023 const bool WantNan = (InterestedClasses & fcNan) != fcNone; 5024 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; 5025 const bool WantPositive = 5026 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone; 5027 if (!WantNan && !WantNegative && !WantPositive) 5028 break; 5029 5030 KnownFPClass KnownLHS, KnownRHS; 5031 5032 computeKnownFPClass(Op->getOperand(1), DemandedElts, 5033 fcNan | fcInf | fcZero | fcNegative, KnownRHS, 5034 Depth + 1, Q); 5035 5036 bool KnowSomethingUseful = 5037 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative); 5038 5039 if (KnowSomethingUseful || WantPositive) { 5040 const FPClassTest InterestedLHS = 5041 WantPositive ? fcAllFlags 5042 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; 5043 5044 computeKnownFPClass(Op->getOperand(0), DemandedElts, 5045 InterestedClasses & InterestedLHS, KnownLHS, 5046 Depth + 1, Q); 5047 } 5048 5049 const Function *F = cast<Instruction>(Op)->getFunction(); 5050 5051 if (Op->getOpcode() == Instruction::FDiv) { 5052 // Only 0/0, Inf/Inf produce NaN. 5053 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5054 (KnownLHS.isKnownNeverInfinity() || 5055 KnownRHS.isKnownNeverInfinity()) && 5056 ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) || 5057 (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) { 5058 Known.knownNot(fcNan); 5059 } 5060 5061 // X / -0.0 is -Inf (or NaN). 5062 // +X / +X is +X 5063 if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative)) 5064 Known.knownNot(fcNegative); 5065 } else { 5066 // Inf REM x and x REM 0 produce NaN. 5067 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 5068 KnownLHS.isKnownNeverInfinity() && F && 5069 KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) { 5070 Known.knownNot(fcNan); 5071 } 5072 5073 // The sign for frem is the same as the first operand. 5074 if (KnownLHS.cannotBeOrderedLessThanZero()) 5075 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 5076 if (KnownLHS.cannotBeOrderedGreaterThanZero()) 5077 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 5078 5079 // See if we can be more aggressive about the sign of 0. 5080 if (KnownLHS.isKnownNever(fcNegative)) 5081 Known.knownNot(fcNegative); 5082 if (KnownLHS.isKnownNever(fcPositive)) 5083 Known.knownNot(fcPositive); 5084 } 5085 5086 break; 5087 } 5088 case Instruction::FPExt: { 5089 // Infinity, nan and zero propagate from source. 5090 computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, 5091 Known, Depth + 1, Q); 5092 5093 const fltSemantics &DstTy = 5094 Op->getType()->getScalarType()->getFltSemantics(); 5095 const fltSemantics &SrcTy = 5096 Op->getOperand(0)->getType()->getScalarType()->getFltSemantics(); 5097 5098 // All subnormal inputs should be in the normal range in the result type. 5099 if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) 5100 Known.knownNot(fcSubnormal); 5101 5102 // Sign bit of a nan isn't guaranteed. 5103 if (!Known.isKnownNeverNaN()) 5104 Known.SignBit = std::nullopt; 5105 break; 5106 } 5107 case Instruction::FPTrunc: { 5108 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, 5109 Depth, Q); 5110 break; 5111 } 5112 case Instruction::SIToFP: 5113 case Instruction::UIToFP: { 5114 // Cannot produce nan 5115 Known.knownNot(fcNan); 5116 5117 // Integers cannot be subnormal 5118 Known.knownNot(fcSubnormal); 5119 5120 // sitofp and uitofp turn into +0.0 for zero. 5121 Known.knownNot(fcNegZero); 5122 if (Op->getOpcode() == Instruction::UIToFP) 5123 Known.signBitMustBeZero(); 5124 5125 if (InterestedClasses & fcInf) { 5126 // Get width of largest magnitude integer (remove a bit if signed). 5127 // This still works for a signed minimum value because the largest FP 5128 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx). 5129 int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits(); 5130 if (Op->getOpcode() == Instruction::SIToFP) 5131 --IntSize; 5132 5133 // If the exponent of the largest finite FP value can hold the largest 5134 // integer, the result of the cast must be finite. 5135 Type *FPTy = Op->getType()->getScalarType(); 5136 if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize) 5137 Known.knownNot(fcInf); 5138 } 5139 5140 break; 5141 } 5142 case Instruction::ExtractElement: { 5143 // Look through extract element. If the index is non-constant or 5144 // out-of-range demand all elements, otherwise just the extracted element. 5145 const Value *Vec = Op->getOperand(0); 5146 const Value *Idx = Op->getOperand(1); 5147 auto *CIdx = dyn_cast<ConstantInt>(Idx); 5148 5149 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) { 5150 unsigned NumElts = VecTy->getNumElements(); 5151 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 5152 if (CIdx && CIdx->getValue().ult(NumElts)) 5153 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 5154 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known, 5155 Depth + 1, Q); 5156 } 5157 5158 break; 5159 } 5160 case Instruction::InsertElement: { 5161 if (isa<ScalableVectorType>(Op->getType())) 5162 return; 5163 5164 const Value *Vec = Op->getOperand(0); 5165 const Value *Elt = Op->getOperand(1); 5166 auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2)); 5167 // Early out if the index is non-constant or out-of-range. 5168 unsigned NumElts = DemandedElts.getBitWidth(); 5169 if (!CIdx || CIdx->getValue().uge(NumElts)) 5170 return; 5171 5172 unsigned EltIdx = CIdx->getZExtValue(); 5173 // Do we demand the inserted element? 5174 if (DemandedElts[EltIdx]) { 5175 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q); 5176 // If we don't know any bits, early out. 5177 if (Known.isUnknown()) 5178 break; 5179 } else { 5180 Known.KnownFPClasses = fcNone; 5181 } 5182 5183 // We don't need the base vector element that has been inserted. 5184 APInt DemandedVecElts = DemandedElts; 5185 DemandedVecElts.clearBit(EltIdx); 5186 if (!!DemandedVecElts) { 5187 KnownFPClass Known2; 5188 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, 5189 Depth + 1, Q); 5190 Known |= Known2; 5191 } 5192 5193 break; 5194 } 5195 case Instruction::ShuffleVector: { 5196 // For undef elements, we don't know anything about the common state of 5197 // the shuffle result. 5198 APInt DemandedLHS, DemandedRHS; 5199 auto *Shuf = dyn_cast<ShuffleVectorInst>(Op); 5200 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) 5201 return; 5202 5203 if (!!DemandedLHS) { 5204 const Value *LHS = Shuf->getOperand(0); 5205 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, 5206 Depth + 1, Q); 5207 5208 // If we don't know any bits, early out. 5209 if (Known.isUnknown()) 5210 break; 5211 } else { 5212 Known.KnownFPClasses = fcNone; 5213 } 5214 5215 if (!!DemandedRHS) { 5216 KnownFPClass Known2; 5217 const Value *RHS = Shuf->getOperand(1); 5218 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, 5219 Depth + 1, Q); 5220 Known |= Known2; 5221 } 5222 5223 break; 5224 } 5225 case Instruction::ExtractValue: { 5226 const ExtractValueInst *Extract = cast<ExtractValueInst>(Op); 5227 ArrayRef<unsigned> Indices = Extract->getIndices(); 5228 const Value *Src = Extract->getAggregateOperand(); 5229 if (isa<StructType>(Src->getType()) && Indices.size() == 1 && 5230 Indices[0] == 0) { 5231 if (const auto *II = dyn_cast<IntrinsicInst>(Src)) { 5232 switch (II->getIntrinsicID()) { 5233 case Intrinsic::frexp: { 5234 Known.knownNot(fcSubnormal); 5235 5236 KnownFPClass KnownSrc; 5237 computeKnownFPClass(II->getArgOperand(0), DemandedElts, 5238 InterestedClasses, KnownSrc, Depth + 1, Q); 5239 5240 const Function *F = cast<Instruction>(Op)->getFunction(); 5241 5242 if (KnownSrc.isKnownNever(fcNegative)) 5243 Known.knownNot(fcNegative); 5244 else { 5245 if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType())) 5246 Known.knownNot(fcNegZero); 5247 if (KnownSrc.isKnownNever(fcNegInf)) 5248 Known.knownNot(fcNegInf); 5249 } 5250 5251 if (KnownSrc.isKnownNever(fcPositive)) 5252 Known.knownNot(fcPositive); 5253 else { 5254 if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType())) 5255 Known.knownNot(fcPosZero); 5256 if (KnownSrc.isKnownNever(fcPosInf)) 5257 Known.knownNot(fcPosInf); 5258 } 5259 5260 Known.propagateNaN(KnownSrc); 5261 return; 5262 } 5263 default: 5264 break; 5265 } 5266 } 5267 } 5268 5269 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1, 5270 Q); 5271 break; 5272 } 5273 case Instruction::PHI: { 5274 const PHINode *P = cast<PHINode>(Op); 5275 // Unreachable blocks may have zero-operand PHI nodes. 5276 if (P->getNumIncomingValues() == 0) 5277 break; 5278 5279 // Otherwise take the unions of the known bit sets of the operands, 5280 // taking conservative care to avoid excessive recursion. 5281 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2; 5282 5283 if (Depth < PhiRecursionLimit) { 5284 // Skip if every incoming value references to ourself. 5285 if (isa_and_nonnull<UndefValue>(P->hasConstantValue())) 5286 break; 5287 5288 bool First = true; 5289 5290 for (Value *IncValue : P->incoming_values()) { 5291 // Skip direct self references. 5292 if (IncValue == P) 5293 continue; 5294 5295 KnownFPClass KnownSrc; 5296 // Recurse, but cap the recursion to two levels, because we don't want 5297 // to waste time spinning around in loops. We need at least depth 2 to 5298 // detect known sign bits. 5299 computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc, 5300 PhiRecursionLimit, Q); 5301 5302 if (First) { 5303 Known = KnownSrc; 5304 First = false; 5305 } else { 5306 Known |= KnownSrc; 5307 } 5308 5309 if (Known.KnownFPClasses == fcAllFlags) 5310 break; 5311 } 5312 } 5313 5314 break; 5315 } 5316 default: 5317 break; 5318 } 5319 } 5320 5321 KnownFPClass llvm::computeKnownFPClass( 5322 const Value *V, const APInt &DemandedElts, const DataLayout &DL, 5323 FPClassTest InterestedClasses, unsigned Depth, const TargetLibraryInfo *TLI, 5324 AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, 5325 bool UseInstrInfo) { 5326 KnownFPClass KnownClasses; 5327 ::computeKnownFPClass( 5328 V, DemandedElts, InterestedClasses, KnownClasses, Depth, 5329 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5330 return KnownClasses; 5331 } 5332 5333 KnownFPClass llvm::computeKnownFPClass( 5334 const Value *V, const DataLayout &DL, FPClassTest InterestedClasses, 5335 unsigned Depth, const TargetLibraryInfo *TLI, AssumptionCache *AC, 5336 const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { 5337 KnownFPClass Known; 5338 ::computeKnownFPClass( 5339 V, Known, InterestedClasses, Depth, 5340 SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); 5341 return Known; 5342 } 5343 5344 Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { 5345 5346 // All byte-wide stores are splatable, even of arbitrary variables. 5347 if (V->getType()->isIntegerTy(8)) 5348 return V; 5349 5350 LLVMContext &Ctx = V->getContext(); 5351 5352 // Undef don't care. 5353 auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx)); 5354 if (isa<UndefValue>(V)) 5355 return UndefInt8; 5356 5357 // Return Undef for zero-sized type. 5358 if (DL.getTypeStoreSize(V->getType()).isZero()) 5359 return UndefInt8; 5360 5361 Constant *C = dyn_cast<Constant>(V); 5362 if (!C) { 5363 // Conceptually, we could handle things like: 5364 // %a = zext i8 %X to i16 5365 // %b = shl i16 %a, 8 5366 // %c = or i16 %a, %b 5367 // but until there is an example that actually needs this, it doesn't seem 5368 // worth worrying about. 5369 return nullptr; 5370 } 5371 5372 // Handle 'null' ConstantArrayZero etc. 5373 if (C->isNullValue()) 5374 return Constant::getNullValue(Type::getInt8Ty(Ctx)); 5375 5376 // Constant floating-point values can be handled as integer values if the 5377 // corresponding integer value is "byteable". An important case is 0.0. 5378 if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 5379 Type *Ty = nullptr; 5380 if (CFP->getType()->isHalfTy()) 5381 Ty = Type::getInt16Ty(Ctx); 5382 else if (CFP->getType()->isFloatTy()) 5383 Ty = Type::getInt32Ty(Ctx); 5384 else if (CFP->getType()->isDoubleTy()) 5385 Ty = Type::getInt64Ty(Ctx); 5386 // Don't handle long double formats, which have strange constraints. 5387 return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL) 5388 : nullptr; 5389 } 5390 5391 // We can handle constant integers that are multiple of 8 bits. 5392 if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 5393 if (CI->getBitWidth() % 8 == 0) { 5394 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); 5395 if (!CI->getValue().isSplat(8)) 5396 return nullptr; 5397 return ConstantInt::get(Ctx, CI->getValue().trunc(8)); 5398 } 5399 } 5400 5401 if (auto *CE = dyn_cast<ConstantExpr>(C)) { 5402 if (CE->getOpcode() == Instruction::IntToPtr) { 5403 if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { 5404 unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); 5405 if (Constant *Op = ConstantFoldIntegerCast( 5406 CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL)) 5407 return isBytewiseValue(Op, DL); 5408 } 5409 } 5410 } 5411 5412 auto Merge = [&](Value *LHS, Value *RHS) -> Value * { 5413 if (LHS == RHS) 5414 return LHS; 5415 if (!LHS || !RHS) 5416 return nullptr; 5417 if (LHS == UndefInt8) 5418 return RHS; 5419 if (RHS == UndefInt8) 5420 return LHS; 5421 return nullptr; 5422 }; 5423 5424 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) { 5425 Value *Val = UndefInt8; 5426 for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I) 5427 if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL)))) 5428 return nullptr; 5429 return Val; 5430 } 5431 5432 if (isa<ConstantAggregate>(C)) { 5433 Value *Val = UndefInt8; 5434 for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) 5435 if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL)))) 5436 return nullptr; 5437 return Val; 5438 } 5439 5440 // Don't try to handle the handful of other constants. 5441 return nullptr; 5442 } 5443 5444 // This is the recursive version of BuildSubAggregate. It takes a few different 5445 // arguments. Idxs is the index within the nested struct From that we are 5446 // looking at now (which is of type IndexedType). IdxSkip is the number of 5447 // indices from Idxs that should be left out when inserting into the resulting 5448 // struct. To is the result struct built so far, new insertvalue instructions 5449 // build on that. 5450 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, 5451 SmallVectorImpl<unsigned> &Idxs, 5452 unsigned IdxSkip, 5453 Instruction *InsertBefore) { 5454 StructType *STy = dyn_cast<StructType>(IndexedType); 5455 if (STy) { 5456 // Save the original To argument so we can modify it 5457 Value *OrigTo = To; 5458 // General case, the type indexed by Idxs is a struct 5459 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 5460 // Process each struct element recursively 5461 Idxs.push_back(i); 5462 Value *PrevTo = To; 5463 To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, 5464 InsertBefore); 5465 Idxs.pop_back(); 5466 if (!To) { 5467 // Couldn't find any inserted value for this index? Cleanup 5468 while (PrevTo != OrigTo) { 5469 InsertValueInst* Del = cast<InsertValueInst>(PrevTo); 5470 PrevTo = Del->getAggregateOperand(); 5471 Del->eraseFromParent(); 5472 } 5473 // Stop processing elements 5474 break; 5475 } 5476 } 5477 // If we successfully found a value for each of our subaggregates 5478 if (To) 5479 return To; 5480 } 5481 // Base case, the type indexed by SourceIdxs is not a struct, or not all of 5482 // the struct's elements had a value that was inserted directly. In the latter 5483 // case, perhaps we can't determine each of the subelements individually, but 5484 // we might be able to find the complete struct somewhere. 5485 5486 // Find the value that is at that particular spot 5487 Value *V = FindInsertedValue(From, Idxs); 5488 5489 if (!V) 5490 return nullptr; 5491 5492 // Insert the value in the new (sub) aggregate 5493 return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp", 5494 InsertBefore); 5495 } 5496 5497 // This helper takes a nested struct and extracts a part of it (which is again a 5498 // struct) into a new value. For example, given the struct: 5499 // { a, { b, { c, d }, e } } 5500 // and the indices "1, 1" this returns 5501 // { c, d }. 5502 // 5503 // It does this by inserting an insertvalue for each element in the resulting 5504 // struct, as opposed to just inserting a single struct. This will only work if 5505 // each of the elements of the substruct are known (ie, inserted into From by an 5506 // insertvalue instruction somewhere). 5507 // 5508 // All inserted insertvalue instructions are inserted before InsertBefore 5509 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, 5510 Instruction *InsertBefore) { 5511 assert(InsertBefore && "Must have someplace to insert!"); 5512 Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), 5513 idx_range); 5514 Value *To = PoisonValue::get(IndexedType); 5515 SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); 5516 unsigned IdxSkip = Idxs.size(); 5517 5518 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); 5519 } 5520 5521 /// Given an aggregate and a sequence of indices, see if the scalar value 5522 /// indexed is already around as a register, for example if it was inserted 5523 /// directly into the aggregate. 5524 /// 5525 /// If InsertBefore is not null, this function will duplicate (modified) 5526 /// insertvalues when a part of a nested struct is extracted. 5527 Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, 5528 Instruction *InsertBefore) { 5529 // Nothing to index? Just return V then (this is useful at the end of our 5530 // recursion). 5531 if (idx_range.empty()) 5532 return V; 5533 // We have indices, so V should have an indexable type. 5534 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && 5535 "Not looking at a struct or array?"); 5536 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && 5537 "Invalid indices for type?"); 5538 5539 if (Constant *C = dyn_cast<Constant>(V)) { 5540 C = C->getAggregateElement(idx_range[0]); 5541 if (!C) return nullptr; 5542 return FindInsertedValue(C, idx_range.slice(1), InsertBefore); 5543 } 5544 5545 if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { 5546 // Loop the indices for the insertvalue instruction in parallel with the 5547 // requested indices 5548 const unsigned *req_idx = idx_range.begin(); 5549 for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); 5550 i != e; ++i, ++req_idx) { 5551 if (req_idx == idx_range.end()) { 5552 // We can't handle this without inserting insertvalues 5553 if (!InsertBefore) 5554 return nullptr; 5555 5556 // The requested index identifies a part of a nested aggregate. Handle 5557 // this specially. For example, 5558 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 5559 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 5560 // %C = extractvalue {i32, { i32, i32 } } %B, 1 5561 // This can be changed into 5562 // %A = insertvalue {i32, i32 } undef, i32 10, 0 5563 // %C = insertvalue {i32, i32 } %A, i32 11, 1 5564 // which allows the unused 0,0 element from the nested struct to be 5565 // removed. 5566 return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx), 5567 InsertBefore); 5568 } 5569 5570 // This insert value inserts something else than what we are looking for. 5571 // See if the (aggregate) value inserted into has the value we are 5572 // looking for, then. 5573 if (*req_idx != *i) 5574 return FindInsertedValue(I->getAggregateOperand(), idx_range, 5575 InsertBefore); 5576 } 5577 // If we end up here, the indices of the insertvalue match with those 5578 // requested (though possibly only partially). Now we recursively look at 5579 // the inserted value, passing any remaining indices. 5580 return FindInsertedValue(I->getInsertedValueOperand(), 5581 ArrayRef(req_idx, idx_range.end()), InsertBefore); 5582 } 5583 5584 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { 5585 // If we're extracting a value from an aggregate that was extracted from 5586 // something else, we can extract from that something else directly instead. 5587 // However, we will need to chain I's indices with the requested indices. 5588 5589 // Calculate the number of indices required 5590 unsigned size = I->getNumIndices() + idx_range.size(); 5591 // Allocate some space to put the new indices in 5592 SmallVector<unsigned, 5> Idxs; 5593 Idxs.reserve(size); 5594 // Add indices from the extract value instruction 5595 Idxs.append(I->idx_begin(), I->idx_end()); 5596 5597 // Add requested indices 5598 Idxs.append(idx_range.begin(), idx_range.end()); 5599 5600 assert(Idxs.size() == size 5601 && "Number of indices added not correct?"); 5602 5603 return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); 5604 } 5605 // Otherwise, we don't know (such as, extracting from a function return value 5606 // or load instruction) 5607 return nullptr; 5608 } 5609 5610 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, 5611 unsigned CharSize) { 5612 // Make sure the GEP has exactly three arguments. 5613 if (GEP->getNumOperands() != 3) 5614 return false; 5615 5616 // Make sure the index-ee is a pointer to array of \p CharSize integers. 5617 // CharSize. 5618 ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType()); 5619 if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) 5620 return false; 5621 5622 // Check to make sure that the first operand of the GEP is an integer and 5623 // has value 0 so that we are sure we're indexing into the initializer. 5624 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); 5625 if (!FirstIdx || !FirstIdx->isZero()) 5626 return false; 5627 5628 return true; 5629 } 5630 5631 // If V refers to an initialized global constant, set Slice either to 5632 // its initializer if the size of its elements equals ElementSize, or, 5633 // for ElementSize == 8, to its representation as an array of unsiged 5634 // char. Return true on success. 5635 // Offset is in the unit "nr of ElementSize sized elements". 5636 bool llvm::getConstantDataArrayInfo(const Value *V, 5637 ConstantDataArraySlice &Slice, 5638 unsigned ElementSize, uint64_t Offset) { 5639 assert(V && "V should not be null."); 5640 assert((ElementSize % 8) == 0 && 5641 "ElementSize expected to be a multiple of the size of a byte."); 5642 unsigned ElementSizeInBytes = ElementSize / 8; 5643 5644 // Drill down into the pointer expression V, ignoring any intervening 5645 // casts, and determine the identity of the object it references along 5646 // with the cumulative byte offset into it. 5647 const GlobalVariable *GV = 5648 dyn_cast<GlobalVariable>(getUnderlyingObject(V)); 5649 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 5650 // Fail if V is not based on constant global object. 5651 return false; 5652 5653 const DataLayout &DL = GV->getParent()->getDataLayout(); 5654 APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0); 5655 5656 if (GV != V->stripAndAccumulateConstantOffsets(DL, Off, 5657 /*AllowNonInbounds*/ true)) 5658 // Fail if a constant offset could not be determined. 5659 return false; 5660 5661 uint64_t StartIdx = Off.getLimitedValue(); 5662 if (StartIdx == UINT64_MAX) 5663 // Fail if the constant offset is excessive. 5664 return false; 5665 5666 // Off/StartIdx is in the unit of bytes. So we need to convert to number of 5667 // elements. Simply bail out if that isn't possible. 5668 if ((StartIdx % ElementSizeInBytes) != 0) 5669 return false; 5670 5671 Offset += StartIdx / ElementSizeInBytes; 5672 ConstantDataArray *Array = nullptr; 5673 ArrayType *ArrayTy = nullptr; 5674 5675 if (GV->getInitializer()->isNullValue()) { 5676 Type *GVTy = GV->getValueType(); 5677 uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue(); 5678 uint64_t Length = SizeInBytes / ElementSizeInBytes; 5679 5680 Slice.Array = nullptr; 5681 Slice.Offset = 0; 5682 // Return an empty Slice for undersized constants to let callers 5683 // transform even undefined library calls into simpler, well-defined 5684 // expressions. This is preferable to making the calls although it 5685 // prevents sanitizers from detecting such calls. 5686 Slice.Length = Length < Offset ? 0 : Length - Offset; 5687 return true; 5688 } 5689 5690 auto *Init = const_cast<Constant *>(GV->getInitializer()); 5691 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) { 5692 Type *InitElTy = ArrayInit->getElementType(); 5693 if (InitElTy->isIntegerTy(ElementSize)) { 5694 // If Init is an initializer for an array of the expected type 5695 // and size, use it as is. 5696 Array = ArrayInit; 5697 ArrayTy = ArrayInit->getType(); 5698 } 5699 } 5700 5701 if (!Array) { 5702 if (ElementSize != 8) 5703 // TODO: Handle conversions to larger integral types. 5704 return false; 5705 5706 // Otherwise extract the portion of the initializer starting 5707 // at Offset as an array of bytes, and reset Offset. 5708 Init = ReadByteArrayFromGlobal(GV, Offset); 5709 if (!Init) 5710 return false; 5711 5712 Offset = 0; 5713 Array = dyn_cast<ConstantDataArray>(Init); 5714 ArrayTy = dyn_cast<ArrayType>(Init->getType()); 5715 } 5716 5717 uint64_t NumElts = ArrayTy->getArrayNumElements(); 5718 if (Offset > NumElts) 5719 return false; 5720 5721 Slice.Array = Array; 5722 Slice.Offset = Offset; 5723 Slice.Length = NumElts - Offset; 5724 return true; 5725 } 5726 5727 /// Extract bytes from the initializer of the constant array V, which need 5728 /// not be a nul-terminated string. On success, store the bytes in Str and 5729 /// return true. When TrimAtNul is set, Str will contain only the bytes up 5730 /// to but not including the first nul. Return false on failure. 5731 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, 5732 bool TrimAtNul) { 5733 ConstantDataArraySlice Slice; 5734 if (!getConstantDataArrayInfo(V, Slice, 8)) 5735 return false; 5736 5737 if (Slice.Array == nullptr) { 5738 if (TrimAtNul) { 5739 // Return a nul-terminated string even for an empty Slice. This is 5740 // safe because all existing SimplifyLibcalls callers require string 5741 // arguments and the behavior of the functions they fold is undefined 5742 // otherwise. Folding the calls this way is preferable to making 5743 // the undefined library calls, even though it prevents sanitizers 5744 // from reporting such calls. 5745 Str = StringRef(); 5746 return true; 5747 } 5748 if (Slice.Length == 1) { 5749 Str = StringRef("", 1); 5750 return true; 5751 } 5752 // We cannot instantiate a StringRef as we do not have an appropriate string 5753 // of 0s at hand. 5754 return false; 5755 } 5756 5757 // Start out with the entire array in the StringRef. 5758 Str = Slice.Array->getAsString(); 5759 // Skip over 'offset' bytes. 5760 Str = Str.substr(Slice.Offset); 5761 5762 if (TrimAtNul) { 5763 // Trim off the \0 and anything after it. If the array is not nul 5764 // terminated, we just return the whole end of string. The client may know 5765 // some other way that the string is length-bound. 5766 Str = Str.substr(0, Str.find('\0')); 5767 } 5768 return true; 5769 } 5770 5771 // These next two are very similar to the above, but also look through PHI 5772 // nodes. 5773 // TODO: See if we can integrate these two together. 5774 5775 /// If we can compute the length of the string pointed to by 5776 /// the specified pointer, return 'len+1'. If we can't, return 0. 5777 static uint64_t GetStringLengthH(const Value *V, 5778 SmallPtrSetImpl<const PHINode*> &PHIs, 5779 unsigned CharSize) { 5780 // Look through noop bitcast instructions. 5781 V = V->stripPointerCasts(); 5782 5783 // If this is a PHI node, there are two cases: either we have already seen it 5784 // or we haven't. 5785 if (const PHINode *PN = dyn_cast<PHINode>(V)) { 5786 if (!PHIs.insert(PN).second) 5787 return ~0ULL; // already in the set. 5788 5789 // If it was new, see if all the input strings are the same length. 5790 uint64_t LenSoFar = ~0ULL; 5791 for (Value *IncValue : PN->incoming_values()) { 5792 uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); 5793 if (Len == 0) return 0; // Unknown length -> unknown. 5794 5795 if (Len == ~0ULL) continue; 5796 5797 if (Len != LenSoFar && LenSoFar != ~0ULL) 5798 return 0; // Disagree -> unknown. 5799 LenSoFar = Len; 5800 } 5801 5802 // Success, all agree. 5803 return LenSoFar; 5804 } 5805 5806 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) 5807 if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { 5808 uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); 5809 if (Len1 == 0) return 0; 5810 uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); 5811 if (Len2 == 0) return 0; 5812 if (Len1 == ~0ULL) return Len2; 5813 if (Len2 == ~0ULL) return Len1; 5814 if (Len1 != Len2) return 0; 5815 return Len1; 5816 } 5817 5818 // Otherwise, see if we can read the string. 5819 ConstantDataArraySlice Slice; 5820 if (!getConstantDataArrayInfo(V, Slice, CharSize)) 5821 return 0; 5822 5823 if (Slice.Array == nullptr) 5824 // Zeroinitializer (including an empty one). 5825 return 1; 5826 5827 // Search for the first nul character. Return a conservative result even 5828 // when there is no nul. This is safe since otherwise the string function 5829 // being folded such as strlen is undefined, and can be preferable to 5830 // making the undefined library call. 5831 unsigned NullIndex = 0; 5832 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { 5833 if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) 5834 break; 5835 } 5836 5837 return NullIndex + 1; 5838 } 5839 5840 /// If we can compute the length of the string pointed to by 5841 /// the specified pointer, return 'len+1'. If we can't, return 0. 5842 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { 5843 if (!V->getType()->isPointerTy()) 5844 return 0; 5845 5846 SmallPtrSet<const PHINode*, 32> PHIs; 5847 uint64_t Len = GetStringLengthH(V, PHIs, CharSize); 5848 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return 5849 // an empty string as a length. 5850 return Len == ~0ULL ? 1 : Len; 5851 } 5852 5853 const Value * 5854 llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, 5855 bool MustPreserveNullness) { 5856 assert(Call && 5857 "getArgumentAliasingToReturnedPointer only works on nonnull calls"); 5858 if (const Value *RV = Call->getReturnedArgOperand()) 5859 return RV; 5860 // This can be used only as a aliasing property. 5861 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5862 Call, MustPreserveNullness)) 5863 return Call->getArgOperand(0); 5864 return nullptr; 5865 } 5866 5867 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 5868 const CallBase *Call, bool MustPreserveNullness) { 5869 switch (Call->getIntrinsicID()) { 5870 case Intrinsic::launder_invariant_group: 5871 case Intrinsic::strip_invariant_group: 5872 case Intrinsic::aarch64_irg: 5873 case Intrinsic::aarch64_tagp: 5874 // The amdgcn_make_buffer_rsrc function does not alter the address of the 5875 // input pointer (and thus preserve null-ness for the purposes of escape 5876 // analysis, which is where the MustPreserveNullness flag comes in to play). 5877 // However, it will not necessarily map ptr addrspace(N) null to ptr 5878 // addrspace(8) null, aka the "null descriptor", which has "all loads return 5879 // 0, all stores are dropped" semantics. Given the context of this intrinsic 5880 // list, no one should be relying on such a strict interpretation of 5881 // MustPreserveNullness (and, at time of writing, they are not), but we 5882 // document this fact out of an abundance of caution. 5883 case Intrinsic::amdgcn_make_buffer_rsrc: 5884 return true; 5885 case Intrinsic::ptrmask: 5886 return !MustPreserveNullness; 5887 default: 5888 return false; 5889 } 5890 } 5891 5892 /// \p PN defines a loop-variant pointer to an object. Check if the 5893 /// previous iteration of the loop was referring to the same object as \p PN. 5894 static bool isSameUnderlyingObjectInLoop(const PHINode *PN, 5895 const LoopInfo *LI) { 5896 // Find the loop-defined value. 5897 Loop *L = LI->getLoopFor(PN->getParent()); 5898 if (PN->getNumIncomingValues() != 2) 5899 return true; 5900 5901 // Find the value from previous iteration. 5902 auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0)); 5903 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5904 PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1)); 5905 if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) 5906 return true; 5907 5908 // If a new pointer is loaded in the loop, the pointer references a different 5909 // object in every iteration. E.g.: 5910 // for (i) 5911 // int *p = a[i]; 5912 // ... 5913 if (auto *Load = dyn_cast<LoadInst>(PrevValue)) 5914 if (!L->isLoopInvariant(Load->getPointerOperand())) 5915 return false; 5916 return true; 5917 } 5918 5919 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) { 5920 if (!V->getType()->isPointerTy()) 5921 return V; 5922 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { 5923 if (auto *GEP = dyn_cast<GEPOperator>(V)) { 5924 V = GEP->getPointerOperand(); 5925 } else if (Operator::getOpcode(V) == Instruction::BitCast || 5926 Operator::getOpcode(V) == Instruction::AddrSpaceCast) { 5927 V = cast<Operator>(V)->getOperand(0); 5928 if (!V->getType()->isPointerTy()) 5929 return V; 5930 } else if (auto *GA = dyn_cast<GlobalAlias>(V)) { 5931 if (GA->isInterposable()) 5932 return V; 5933 V = GA->getAliasee(); 5934 } else { 5935 if (auto *PHI = dyn_cast<PHINode>(V)) { 5936 // Look through single-arg phi nodes created by LCSSA. 5937 if (PHI->getNumIncomingValues() == 1) { 5938 V = PHI->getIncomingValue(0); 5939 continue; 5940 } 5941 } else if (auto *Call = dyn_cast<CallBase>(V)) { 5942 // CaptureTracking can know about special capturing properties of some 5943 // intrinsics like launder.invariant.group, that can't be expressed with 5944 // the attributes, but have properties like returning aliasing pointer. 5945 // Because some analysis may assume that nocaptured pointer is not 5946 // returned from some special intrinsic (because function would have to 5947 // be marked with returns attribute), it is crucial to use this function 5948 // because it should be in sync with CaptureTracking. Not using it may 5949 // cause weird miscompilations where 2 aliasing pointers are assumed to 5950 // noalias. 5951 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { 5952 V = RP; 5953 continue; 5954 } 5955 } 5956 5957 return V; 5958 } 5959 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 5960 } 5961 return V; 5962 } 5963 5964 void llvm::getUnderlyingObjects(const Value *V, 5965 SmallVectorImpl<const Value *> &Objects, 5966 LoopInfo *LI, unsigned MaxLookup) { 5967 SmallPtrSet<const Value *, 4> Visited; 5968 SmallVector<const Value *, 4> Worklist; 5969 Worklist.push_back(V); 5970 do { 5971 const Value *P = Worklist.pop_back_val(); 5972 P = getUnderlyingObject(P, MaxLookup); 5973 5974 if (!Visited.insert(P).second) 5975 continue; 5976 5977 if (auto *SI = dyn_cast<SelectInst>(P)) { 5978 Worklist.push_back(SI->getTrueValue()); 5979 Worklist.push_back(SI->getFalseValue()); 5980 continue; 5981 } 5982 5983 if (auto *PN = dyn_cast<PHINode>(P)) { 5984 // If this PHI changes the underlying object in every iteration of the 5985 // loop, don't look through it. Consider: 5986 // int **A; 5987 // for (i) { 5988 // Prev = Curr; // Prev = PHI (Prev_0, Curr) 5989 // Curr = A[i]; 5990 // *Prev, *Curr; 5991 // 5992 // Prev is tracking Curr one iteration behind so they refer to different 5993 // underlying objects. 5994 if (!LI || !LI->isLoopHeader(PN->getParent()) || 5995 isSameUnderlyingObjectInLoop(PN, LI)) 5996 append_range(Worklist, PN->incoming_values()); 5997 continue; 5998 } 5999 6000 Objects.push_back(P); 6001 } while (!Worklist.empty()); 6002 } 6003 6004 /// This is the function that does the work of looking through basic 6005 /// ptrtoint+arithmetic+inttoptr sequences. 6006 static const Value *getUnderlyingObjectFromInt(const Value *V) { 6007 do { 6008 if (const Operator *U = dyn_cast<Operator>(V)) { 6009 // If we find a ptrtoint, we can transfer control back to the 6010 // regular getUnderlyingObjectFromInt. 6011 if (U->getOpcode() == Instruction::PtrToInt) 6012 return U->getOperand(0); 6013 // If we find an add of a constant, a multiplied value, or a phi, it's 6014 // likely that the other operand will lead us to the base 6015 // object. We don't have to worry about the case where the 6016 // object address is somehow being computed by the multiply, 6017 // because our callers only care when the result is an 6018 // identifiable object. 6019 if (U->getOpcode() != Instruction::Add || 6020 (!isa<ConstantInt>(U->getOperand(1)) && 6021 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && 6022 !isa<PHINode>(U->getOperand(1)))) 6023 return V; 6024 V = U->getOperand(0); 6025 } else { 6026 return V; 6027 } 6028 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 6029 } while (true); 6030 } 6031 6032 /// This is a wrapper around getUnderlyingObjects and adds support for basic 6033 /// ptrtoint+arithmetic+inttoptr sequences. 6034 /// It returns false if unidentified object is found in getUnderlyingObjects. 6035 bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, 6036 SmallVectorImpl<Value *> &Objects) { 6037 SmallPtrSet<const Value *, 16> Visited; 6038 SmallVector<const Value *, 4> Working(1, V); 6039 do { 6040 V = Working.pop_back_val(); 6041 6042 SmallVector<const Value *, 4> Objs; 6043 getUnderlyingObjects(V, Objs); 6044 6045 for (const Value *V : Objs) { 6046 if (!Visited.insert(V).second) 6047 continue; 6048 if (Operator::getOpcode(V) == Instruction::IntToPtr) { 6049 const Value *O = 6050 getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 6051 if (O->getType()->isPointerTy()) { 6052 Working.push_back(O); 6053 continue; 6054 } 6055 } 6056 // If getUnderlyingObjects fails to find an identifiable object, 6057 // getUnderlyingObjectsForCodeGen also fails for safety. 6058 if (!isIdentifiedObject(V)) { 6059 Objects.clear(); 6060 return false; 6061 } 6062 Objects.push_back(const_cast<Value *>(V)); 6063 } 6064 } while (!Working.empty()); 6065 return true; 6066 } 6067 6068 AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) { 6069 AllocaInst *Result = nullptr; 6070 SmallPtrSet<Value *, 4> Visited; 6071 SmallVector<Value *, 4> Worklist; 6072 6073 auto AddWork = [&](Value *V) { 6074 if (Visited.insert(V).second) 6075 Worklist.push_back(V); 6076 }; 6077 6078 AddWork(V); 6079 do { 6080 V = Worklist.pop_back_val(); 6081 assert(Visited.count(V)); 6082 6083 if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 6084 if (Result && Result != AI) 6085 return nullptr; 6086 Result = AI; 6087 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 6088 AddWork(CI->getOperand(0)); 6089 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 6090 for (Value *IncValue : PN->incoming_values()) 6091 AddWork(IncValue); 6092 } else if (auto *SI = dyn_cast<SelectInst>(V)) { 6093 AddWork(SI->getTrueValue()); 6094 AddWork(SI->getFalseValue()); 6095 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { 6096 if (OffsetZero && !GEP->hasAllZeroIndices()) 6097 return nullptr; 6098 AddWork(GEP->getPointerOperand()); 6099 } else if (CallBase *CB = dyn_cast<CallBase>(V)) { 6100 Value *Returned = CB->getReturnedArgOperand(); 6101 if (Returned) 6102 AddWork(Returned); 6103 else 6104 return nullptr; 6105 } else { 6106 return nullptr; 6107 } 6108 } while (!Worklist.empty()); 6109 6110 return Result; 6111 } 6112 6113 static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6114 const Value *V, bool AllowLifetime, bool AllowDroppable) { 6115 for (const User *U : V->users()) { 6116 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 6117 if (!II) 6118 return false; 6119 6120 if (AllowLifetime && II->isLifetimeStartOrEnd()) 6121 continue; 6122 6123 if (AllowDroppable && II->isDroppable()) 6124 continue; 6125 6126 return false; 6127 } 6128 return true; 6129 } 6130 6131 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { 6132 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6133 V, /* AllowLifetime */ true, /* AllowDroppable */ false); 6134 } 6135 bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) { 6136 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper( 6137 V, /* AllowLifetime */ true, /* AllowDroppable */ true); 6138 } 6139 6140 bool llvm::mustSuppressSpeculation(const LoadInst &LI) { 6141 if (!LI.isUnordered()) 6142 return true; 6143 const Function &F = *LI.getFunction(); 6144 // Speculative load may create a race that did not exist in the source. 6145 return F.hasFnAttribute(Attribute::SanitizeThread) || 6146 // Speculative load may load data from dirty regions. 6147 F.hasFnAttribute(Attribute::SanitizeAddress) || 6148 F.hasFnAttribute(Attribute::SanitizeHWAddress); 6149 } 6150 6151 bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, 6152 const Instruction *CtxI, 6153 AssumptionCache *AC, 6154 const DominatorTree *DT, 6155 const TargetLibraryInfo *TLI) { 6156 return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, 6157 AC, DT, TLI); 6158 } 6159 6160 bool llvm::isSafeToSpeculativelyExecuteWithOpcode( 6161 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI, 6162 AssumptionCache *AC, const DominatorTree *DT, 6163 const TargetLibraryInfo *TLI) { 6164 #ifndef NDEBUG 6165 if (Inst->getOpcode() != Opcode) { 6166 // Check that the operands are actually compatible with the Opcode override. 6167 auto hasEqualReturnAndLeadingOperandTypes = 6168 [](const Instruction *Inst, unsigned NumLeadingOperands) { 6169 if (Inst->getNumOperands() < NumLeadingOperands) 6170 return false; 6171 const Type *ExpectedType = Inst->getType(); 6172 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp) 6173 if (Inst->getOperand(ItOp)->getType() != ExpectedType) 6174 return false; 6175 return true; 6176 }; 6177 assert(!Instruction::isBinaryOp(Opcode) || 6178 hasEqualReturnAndLeadingOperandTypes(Inst, 2)); 6179 assert(!Instruction::isUnaryOp(Opcode) || 6180 hasEqualReturnAndLeadingOperandTypes(Inst, 1)); 6181 } 6182 #endif 6183 6184 switch (Opcode) { 6185 default: 6186 return true; 6187 case Instruction::UDiv: 6188 case Instruction::URem: { 6189 // x / y is undefined if y == 0. 6190 const APInt *V; 6191 if (match(Inst->getOperand(1), m_APInt(V))) 6192 return *V != 0; 6193 return false; 6194 } 6195 case Instruction::SDiv: 6196 case Instruction::SRem: { 6197 // x / y is undefined if y == 0 or x == INT_MIN and y == -1 6198 const APInt *Numerator, *Denominator; 6199 if (!match(Inst->getOperand(1), m_APInt(Denominator))) 6200 return false; 6201 // We cannot hoist this division if the denominator is 0. 6202 if (*Denominator == 0) 6203 return false; 6204 // It's safe to hoist if the denominator is not 0 or -1. 6205 if (!Denominator->isAllOnes()) 6206 return true; 6207 // At this point we know that the denominator is -1. It is safe to hoist as 6208 // long we know that the numerator is not INT_MIN. 6209 if (match(Inst->getOperand(0), m_APInt(Numerator))) 6210 return !Numerator->isMinSignedValue(); 6211 // The numerator *might* be MinSignedValue. 6212 return false; 6213 } 6214 case Instruction::Load: { 6215 const LoadInst *LI = dyn_cast<LoadInst>(Inst); 6216 if (!LI) 6217 return false; 6218 if (mustSuppressSpeculation(*LI)) 6219 return false; 6220 const DataLayout &DL = LI->getModule()->getDataLayout(); 6221 return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), 6222 LI->getType(), LI->getAlign(), DL, 6223 CtxI, AC, DT, TLI); 6224 } 6225 case Instruction::Call: { 6226 auto *CI = dyn_cast<const CallInst>(Inst); 6227 if (!CI) 6228 return false; 6229 const Function *Callee = CI->getCalledFunction(); 6230 6231 // The called function could have undefined behavior or side-effects, even 6232 // if marked readnone nounwind. 6233 return Callee && Callee->isSpeculatable(); 6234 } 6235 case Instruction::VAArg: 6236 case Instruction::Alloca: 6237 case Instruction::Invoke: 6238 case Instruction::CallBr: 6239 case Instruction::PHI: 6240 case Instruction::Store: 6241 case Instruction::Ret: 6242 case Instruction::Br: 6243 case Instruction::IndirectBr: 6244 case Instruction::Switch: 6245 case Instruction::Unreachable: 6246 case Instruction::Fence: 6247 case Instruction::AtomicRMW: 6248 case Instruction::AtomicCmpXchg: 6249 case Instruction::LandingPad: 6250 case Instruction::Resume: 6251 case Instruction::CatchSwitch: 6252 case Instruction::CatchPad: 6253 case Instruction::CatchRet: 6254 case Instruction::CleanupPad: 6255 case Instruction::CleanupRet: 6256 return false; // Misc instructions which have effects 6257 } 6258 } 6259 6260 bool llvm::mayHaveNonDefUseDependency(const Instruction &I) { 6261 if (I.mayReadOrWriteMemory()) 6262 // Memory dependency possible 6263 return true; 6264 if (!isSafeToSpeculativelyExecute(&I)) 6265 // Can't move above a maythrow call or infinite loop. Or if an 6266 // inalloca alloca, above a stacksave call. 6267 return true; 6268 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 6269 // 1) Can't reorder two inf-loop calls, even if readonly 6270 // 2) Also can't reorder an inf-loop call below a instruction which isn't 6271 // safe to speculative execute. (Inverse of above) 6272 return true; 6273 return false; 6274 } 6275 6276 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult. 6277 static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { 6278 switch (OR) { 6279 case ConstantRange::OverflowResult::MayOverflow: 6280 return OverflowResult::MayOverflow; 6281 case ConstantRange::OverflowResult::AlwaysOverflowsLow: 6282 return OverflowResult::AlwaysOverflowsLow; 6283 case ConstantRange::OverflowResult::AlwaysOverflowsHigh: 6284 return OverflowResult::AlwaysOverflowsHigh; 6285 case ConstantRange::OverflowResult::NeverOverflows: 6286 return OverflowResult::NeverOverflows; 6287 } 6288 llvm_unreachable("Unknown OverflowResult"); 6289 } 6290 6291 /// Combine constant ranges from computeConstantRange() and computeKnownBits(). 6292 ConstantRange 6293 llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, 6294 bool ForSigned, 6295 const SimplifyQuery &SQ) { 6296 ConstantRange CR1 = 6297 ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); 6298 ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); 6299 ConstantRange::PreferredRangeType RangeType = 6300 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; 6301 return CR1.intersectWith(CR2, RangeType); 6302 } 6303 6304 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, 6305 const Value *RHS, 6306 const SimplifyQuery &SQ) { 6307 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6308 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6309 ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); 6310 ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); 6311 return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); 6312 } 6313 6314 OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, 6315 const Value *RHS, 6316 const SimplifyQuery &SQ) { 6317 // Multiplying n * m significant bits yields a result of n + m significant 6318 // bits. If the total number of significant bits does not exceed the 6319 // result bit width (minus 1), there is no overflow. 6320 // This means if we have enough leading sign bits in the operands 6321 // we can guarantee that the result does not overflow. 6322 // Ref: "Hacker's Delight" by Henry Warren 6323 unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); 6324 6325 // Note that underestimating the number of sign bits gives a more 6326 // conservative answer. 6327 unsigned SignBits = 6328 ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ); 6329 6330 // First handle the easy case: if we have enough sign bits there's 6331 // definitely no overflow. 6332 if (SignBits > BitWidth + 1) 6333 return OverflowResult::NeverOverflows; 6334 6335 // There are two ambiguous cases where there can be no overflow: 6336 // SignBits == BitWidth + 1 and 6337 // SignBits == BitWidth 6338 // The second case is difficult to check, therefore we only handle the 6339 // first case. 6340 if (SignBits == BitWidth + 1) { 6341 // It overflows only when both arguments are negative and the true 6342 // product is exactly the minimum negative number. 6343 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 6344 // For simplicity we just check if at least one side is not negative. 6345 KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); 6346 KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); 6347 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) 6348 return OverflowResult::NeverOverflows; 6349 } 6350 return OverflowResult::MayOverflow; 6351 } 6352 6353 OverflowResult 6354 llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS, 6355 const WithCache<const Value *> &RHS, 6356 const SimplifyQuery &SQ) { 6357 ConstantRange LHSRange = 6358 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6359 ConstantRange RHSRange = 6360 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6361 return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); 6362 } 6363 6364 static OverflowResult 6365 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 6366 const WithCache<const Value *> &RHS, 6367 const AddOperator *Add, const SimplifyQuery &SQ) { 6368 if (Add && Add->hasNoSignedWrap()) { 6369 return OverflowResult::NeverOverflows; 6370 } 6371 6372 // If LHS and RHS each have at least two sign bits, the addition will look 6373 // like 6374 // 6375 // XX..... + 6376 // YY..... 6377 // 6378 // If the carry into the most significant position is 0, X and Y can't both 6379 // be 1 and therefore the carry out of the addition is also 0. 6380 // 6381 // If the carry into the most significant position is 1, X and Y can't both 6382 // be 0 and therefore the carry out of the addition is also 1. 6383 // 6384 // Since the carry into the most significant position is always equal to 6385 // the carry out of the addition, there is no signed overflow. 6386 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6387 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6388 return OverflowResult::NeverOverflows; 6389 6390 ConstantRange LHSRange = 6391 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6392 ConstantRange RHSRange = 6393 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6394 OverflowResult OR = 6395 mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); 6396 if (OR != OverflowResult::MayOverflow) 6397 return OR; 6398 6399 // The remaining code needs Add to be available. Early returns if not so. 6400 if (!Add) 6401 return OverflowResult::MayOverflow; 6402 6403 // If the sign of Add is the same as at least one of the operands, this add 6404 // CANNOT overflow. If this can be determined from the known bits of the 6405 // operands the above signedAddMayOverflow() check will have already done so. 6406 // The only other way to improve on the known bits is from an assumption, so 6407 // call computeKnownBitsFromContext() directly. 6408 bool LHSOrRHSKnownNonNegative = 6409 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative()); 6410 bool LHSOrRHSKnownNegative = 6411 (LHSRange.isAllNegative() || RHSRange.isAllNegative()); 6412 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { 6413 KnownBits AddKnown(LHSRange.getBitWidth()); 6414 computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ); 6415 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || 6416 (AddKnown.isNegative() && LHSOrRHSKnownNegative)) 6417 return OverflowResult::NeverOverflows; 6418 } 6419 6420 return OverflowResult::MayOverflow; 6421 } 6422 6423 OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, 6424 const Value *RHS, 6425 const SimplifyQuery &SQ) { 6426 // X - (X % ?) 6427 // The remainder of a value can't have greater magnitude than itself, 6428 // so the subtraction can't overflow. 6429 6430 // X - (X -nuw ?) 6431 // In the minimal case, this would simplify to "?", so there's no subtract 6432 // at all. But if this analysis is used to peek through casts, for example, 6433 // then determining no-overflow may allow other transforms. 6434 6435 // TODO: There are other patterns like this. 6436 // See simplifyICmpWithBinOpOnLHS() for candidates. 6437 if (match(RHS, m_URem(m_Specific(LHS), m_Value())) || 6438 match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))) 6439 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6440 return OverflowResult::NeverOverflows; 6441 6442 // Checking for conditions implied by dominating conditions may be expensive. 6443 // Limit it to usub_with_overflow calls for now. 6444 if (match(SQ.CxtI, 6445 m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value()))) 6446 if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI, 6447 SQ.DL)) { 6448 if (*C) 6449 return OverflowResult::NeverOverflows; 6450 return OverflowResult::AlwaysOverflowsLow; 6451 } 6452 ConstantRange LHSRange = 6453 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); 6454 ConstantRange RHSRange = 6455 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); 6456 return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); 6457 } 6458 6459 OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, 6460 const Value *RHS, 6461 const SimplifyQuery &SQ) { 6462 // X - (X % ?) 6463 // The remainder of a value can't have greater magnitude than itself, 6464 // so the subtraction can't overflow. 6465 6466 // X - (X -nsw ?) 6467 // In the minimal case, this would simplify to "?", so there's no subtract 6468 // at all. But if this analysis is used to peek through casts, for example, 6469 // then determining no-overflow may allow other transforms. 6470 if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) || 6471 match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))) 6472 if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) 6473 return OverflowResult::NeverOverflows; 6474 6475 // If LHS and RHS each have at least two sign bits, the subtraction 6476 // cannot overflow. 6477 if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && 6478 ::ComputeNumSignBits(RHS, 0, SQ) > 1) 6479 return OverflowResult::NeverOverflows; 6480 6481 ConstantRange LHSRange = 6482 computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); 6483 ConstantRange RHSRange = 6484 computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); 6485 return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); 6486 } 6487 6488 bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, 6489 const DominatorTree &DT) { 6490 SmallVector<const BranchInst *, 2> GuardingBranches; 6491 SmallVector<const ExtractValueInst *, 2> Results; 6492 6493 for (const User *U : WO->users()) { 6494 if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) { 6495 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); 6496 6497 if (EVI->getIndices()[0] == 0) 6498 Results.push_back(EVI); 6499 else { 6500 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); 6501 6502 for (const auto *U : EVI->users()) 6503 if (const auto *B = dyn_cast<BranchInst>(U)) { 6504 assert(B->isConditional() && "How else is it using an i1?"); 6505 GuardingBranches.push_back(B); 6506 } 6507 } 6508 } else { 6509 // We are using the aggregate directly in a way we don't want to analyze 6510 // here (storing it to a global, say). 6511 return false; 6512 } 6513 } 6514 6515 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) { 6516 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); 6517 if (!NoWrapEdge.isSingleEdge()) 6518 return false; 6519 6520 // Check if all users of the add are provably no-wrap. 6521 for (const auto *Result : Results) { 6522 // If the extractvalue itself is not executed on overflow, the we don't 6523 // need to check each use separately, since domination is transitive. 6524 if (DT.dominates(NoWrapEdge, Result->getParent())) 6525 continue; 6526 6527 for (const auto &RU : Result->uses()) 6528 if (!DT.dominates(NoWrapEdge, RU)) 6529 return false; 6530 } 6531 6532 return true; 6533 }; 6534 6535 return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); 6536 } 6537 6538 /// Shifts return poison if shiftwidth is larger than the bitwidth. 6539 static bool shiftAmountKnownInRange(const Value *ShiftAmount) { 6540 auto *C = dyn_cast<Constant>(ShiftAmount); 6541 if (!C) 6542 return false; 6543 6544 // Shifts return poison if shiftwidth is larger than the bitwidth. 6545 SmallVector<const Constant *, 4> ShiftAmounts; 6546 if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) { 6547 unsigned NumElts = FVTy->getNumElements(); 6548 for (unsigned i = 0; i < NumElts; ++i) 6549 ShiftAmounts.push_back(C->getAggregateElement(i)); 6550 } else if (isa<ScalableVectorType>(C->getType())) 6551 return false; // Can't tell, just return false to be safe 6552 else 6553 ShiftAmounts.push_back(C); 6554 6555 bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) { 6556 auto *CI = dyn_cast_or_null<ConstantInt>(C); 6557 return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth()); 6558 }); 6559 6560 return Safe; 6561 } 6562 6563 enum class UndefPoisonKind { 6564 PoisonOnly = (1 << 0), 6565 UndefOnly = (1 << 1), 6566 UndefOrPoison = PoisonOnly | UndefOnly, 6567 }; 6568 6569 static bool includesPoison(UndefPoisonKind Kind) { 6570 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0; 6571 } 6572 6573 static bool includesUndef(UndefPoisonKind Kind) { 6574 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0; 6575 } 6576 6577 static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, 6578 bool ConsiderFlagsAndMetadata) { 6579 6580 if (ConsiderFlagsAndMetadata && includesPoison(Kind) && 6581 Op->hasPoisonGeneratingFlagsOrMetadata()) 6582 return true; 6583 6584 unsigned Opcode = Op->getOpcode(); 6585 6586 // Check whether opcode is a poison/undef-generating operation 6587 switch (Opcode) { 6588 case Instruction::Shl: 6589 case Instruction::AShr: 6590 case Instruction::LShr: 6591 return includesPoison(Kind) && !shiftAmountKnownInRange(Op->getOperand(1)); 6592 case Instruction::FPToSI: 6593 case Instruction::FPToUI: 6594 // fptosi/ui yields poison if the resulting value does not fit in the 6595 // destination type. 6596 return true; 6597 case Instruction::Call: 6598 if (auto *II = dyn_cast<IntrinsicInst>(Op)) { 6599 switch (II->getIntrinsicID()) { 6600 // TODO: Add more intrinsics. 6601 case Intrinsic::ctlz: 6602 case Intrinsic::cttz: 6603 case Intrinsic::abs: 6604 if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue()) 6605 return false; 6606 break; 6607 case Intrinsic::ctpop: 6608 case Intrinsic::bswap: 6609 case Intrinsic::bitreverse: 6610 case Intrinsic::fshl: 6611 case Intrinsic::fshr: 6612 case Intrinsic::smax: 6613 case Intrinsic::smin: 6614 case Intrinsic::umax: 6615 case Intrinsic::umin: 6616 case Intrinsic::ptrmask: 6617 case Intrinsic::fptoui_sat: 6618 case Intrinsic::fptosi_sat: 6619 case Intrinsic::sadd_with_overflow: 6620 case Intrinsic::ssub_with_overflow: 6621 case Intrinsic::smul_with_overflow: 6622 case Intrinsic::uadd_with_overflow: 6623 case Intrinsic::usub_with_overflow: 6624 case Intrinsic::umul_with_overflow: 6625 case Intrinsic::sadd_sat: 6626 case Intrinsic::uadd_sat: 6627 case Intrinsic::ssub_sat: 6628 case Intrinsic::usub_sat: 6629 return false; 6630 case Intrinsic::sshl_sat: 6631 case Intrinsic::ushl_sat: 6632 return includesPoison(Kind) && 6633 !shiftAmountKnownInRange(II->getArgOperand(1)); 6634 case Intrinsic::fma: 6635 case Intrinsic::fmuladd: 6636 case Intrinsic::sqrt: 6637 case Intrinsic::powi: 6638 case Intrinsic::sin: 6639 case Intrinsic::cos: 6640 case Intrinsic::pow: 6641 case Intrinsic::log: 6642 case Intrinsic::log10: 6643 case Intrinsic::log2: 6644 case Intrinsic::exp: 6645 case Intrinsic::exp2: 6646 case Intrinsic::exp10: 6647 case Intrinsic::fabs: 6648 case Intrinsic::copysign: 6649 case Intrinsic::floor: 6650 case Intrinsic::ceil: 6651 case Intrinsic::trunc: 6652 case Intrinsic::rint: 6653 case Intrinsic::nearbyint: 6654 case Intrinsic::round: 6655 case Intrinsic::roundeven: 6656 case Intrinsic::fptrunc_round: 6657 case Intrinsic::canonicalize: 6658 case Intrinsic::arithmetic_fence: 6659 case Intrinsic::minnum: 6660 case Intrinsic::maxnum: 6661 case Intrinsic::minimum: 6662 case Intrinsic::maximum: 6663 case Intrinsic::is_fpclass: 6664 case Intrinsic::ldexp: 6665 case Intrinsic::frexp: 6666 return false; 6667 case Intrinsic::lround: 6668 case Intrinsic::llround: 6669 case Intrinsic::lrint: 6670 case Intrinsic::llrint: 6671 // If the value doesn't fit an unspecified value is returned (but this 6672 // is not poison). 6673 return false; 6674 } 6675 } 6676 [[fallthrough]]; 6677 case Instruction::CallBr: 6678 case Instruction::Invoke: { 6679 const auto *CB = cast<CallBase>(Op); 6680 return !CB->hasRetAttr(Attribute::NoUndef); 6681 } 6682 case Instruction::InsertElement: 6683 case Instruction::ExtractElement: { 6684 // If index exceeds the length of the vector, it returns poison 6685 auto *VTy = cast<VectorType>(Op->getOperand(0)->getType()); 6686 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1; 6687 auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp)); 6688 if (includesPoison(Kind)) 6689 return !Idx || 6690 Idx->getValue().uge(VTy->getElementCount().getKnownMinValue()); 6691 return false; 6692 } 6693 case Instruction::ShuffleVector: { 6694 ArrayRef<int> Mask = isa<ConstantExpr>(Op) 6695 ? cast<ConstantExpr>(Op)->getShuffleMask() 6696 : cast<ShuffleVectorInst>(Op)->getShuffleMask(); 6697 return includesPoison(Kind) && is_contained(Mask, PoisonMaskElem); 6698 } 6699 case Instruction::FNeg: 6700 case Instruction::PHI: 6701 case Instruction::Select: 6702 case Instruction::URem: 6703 case Instruction::SRem: 6704 case Instruction::ExtractValue: 6705 case Instruction::InsertValue: 6706 case Instruction::Freeze: 6707 case Instruction::ICmp: 6708 case Instruction::FCmp: 6709 case Instruction::FAdd: 6710 case Instruction::FSub: 6711 case Instruction::FMul: 6712 case Instruction::FDiv: 6713 case Instruction::FRem: 6714 return false; 6715 case Instruction::GetElementPtr: 6716 // inbounds is handled above 6717 // TODO: what about inrange on constexpr? 6718 return false; 6719 default: { 6720 const auto *CE = dyn_cast<ConstantExpr>(Op); 6721 if (isa<CastInst>(Op) || (CE && CE->isCast())) 6722 return false; 6723 else if (Instruction::isBinaryOp(Opcode)) 6724 return false; 6725 // Be conservative and return true. 6726 return true; 6727 } 6728 } 6729 } 6730 6731 bool llvm::canCreateUndefOrPoison(const Operator *Op, 6732 bool ConsiderFlagsAndMetadata) { 6733 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison, 6734 ConsiderFlagsAndMetadata); 6735 } 6736 6737 bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) { 6738 return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly, 6739 ConsiderFlagsAndMetadata); 6740 } 6741 6742 static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V, 6743 unsigned Depth) { 6744 if (ValAssumedPoison == V) 6745 return true; 6746 6747 const unsigned MaxDepth = 2; 6748 if (Depth >= MaxDepth) 6749 return false; 6750 6751 if (const auto *I = dyn_cast<Instruction>(V)) { 6752 if (any_of(I->operands(), [=](const Use &Op) { 6753 return propagatesPoison(Op) && 6754 directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1); 6755 })) 6756 return true; 6757 6758 // V = extractvalue V0, idx 6759 // V2 = extractvalue V0, idx2 6760 // V0's elements are all poison or not. (e.g., add_with_overflow) 6761 const WithOverflowInst *II; 6762 if (match(I, m_ExtractValue(m_WithOverflowInst(II))) && 6763 (match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) || 6764 llvm::is_contained(II->args(), ValAssumedPoison))) 6765 return true; 6766 } 6767 return false; 6768 } 6769 6770 static bool impliesPoison(const Value *ValAssumedPoison, const Value *V, 6771 unsigned Depth) { 6772 if (isGuaranteedNotToBePoison(ValAssumedPoison)) 6773 return true; 6774 6775 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0)) 6776 return true; 6777 6778 const unsigned MaxDepth = 2; 6779 if (Depth >= MaxDepth) 6780 return false; 6781 6782 const auto *I = dyn_cast<Instruction>(ValAssumedPoison); 6783 if (I && !canCreatePoison(cast<Operator>(I))) { 6784 return all_of(I->operands(), [=](const Value *Op) { 6785 return impliesPoison(Op, V, Depth + 1); 6786 }); 6787 } 6788 return false; 6789 } 6790 6791 bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) { 6792 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0); 6793 } 6794 6795 static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly); 6796 6797 static bool isGuaranteedNotToBeUndefOrPoison( 6798 const Value *V, AssumptionCache *AC, const Instruction *CtxI, 6799 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) { 6800 if (Depth >= MaxAnalysisRecursionDepth) 6801 return false; 6802 6803 if (isa<MetadataAsValue>(V)) 6804 return false; 6805 6806 if (const auto *A = dyn_cast<Argument>(V)) { 6807 if (A->hasAttribute(Attribute::NoUndef) || 6808 A->hasAttribute(Attribute::Dereferenceable) || 6809 A->hasAttribute(Attribute::DereferenceableOrNull)) 6810 return true; 6811 } 6812 6813 if (auto *C = dyn_cast<Constant>(V)) { 6814 if (isa<PoisonValue>(C)) 6815 return !includesPoison(Kind); 6816 6817 if (isa<UndefValue>(C)) 6818 return !includesUndef(Kind); 6819 6820 if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(V) || 6821 isa<ConstantPointerNull>(C) || isa<Function>(C)) 6822 return true; 6823 6824 if (C->getType()->isVectorTy() && !isa<ConstantExpr>(C)) 6825 return (!includesUndef(Kind) ? !C->containsPoisonElement() 6826 : !C->containsUndefOrPoisonElement()) && 6827 !C->containsConstantExpression(); 6828 } 6829 6830 // Strip cast operations from a pointer value. 6831 // Note that stripPointerCastsSameRepresentation can strip off getelementptr 6832 // inbounds with zero offset. To guarantee that the result isn't poison, the 6833 // stripped pointer is checked as it has to be pointing into an allocated 6834 // object or be null `null` to ensure `inbounds` getelement pointers with a 6835 // zero offset could not produce poison. 6836 // It can strip off addrspacecast that do not change bit representation as 6837 // well. We believe that such addrspacecast is equivalent to no-op. 6838 auto *StrippedV = V->stripPointerCastsSameRepresentation(); 6839 if (isa<AllocaInst>(StrippedV) || isa<GlobalVariable>(StrippedV) || 6840 isa<Function>(StrippedV) || isa<ConstantPointerNull>(StrippedV)) 6841 return true; 6842 6843 auto OpCheck = [&](const Value *V) { 6844 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind); 6845 }; 6846 6847 if (auto *Opr = dyn_cast<Operator>(V)) { 6848 // If the value is a freeze instruction, then it can never 6849 // be undef or poison. 6850 if (isa<FreezeInst>(V)) 6851 return true; 6852 6853 if (const auto *CB = dyn_cast<CallBase>(V)) { 6854 if (CB->hasRetAttr(Attribute::NoUndef) || 6855 CB->hasRetAttr(Attribute::Dereferenceable) || 6856 CB->hasRetAttr(Attribute::DereferenceableOrNull)) 6857 return true; 6858 } 6859 6860 if (const auto *PN = dyn_cast<PHINode>(V)) { 6861 unsigned Num = PN->getNumIncomingValues(); 6862 bool IsWellDefined = true; 6863 for (unsigned i = 0; i < Num; ++i) { 6864 auto *TI = PN->getIncomingBlock(i)->getTerminator(); 6865 if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI, 6866 DT, Depth + 1, Kind)) { 6867 IsWellDefined = false; 6868 break; 6869 } 6870 } 6871 if (IsWellDefined) 6872 return true; 6873 } else if (!::canCreateUndefOrPoison(Opr, Kind, 6874 /*ConsiderFlagsAndMetadata*/ true) && 6875 all_of(Opr->operands(), OpCheck)) 6876 return true; 6877 } 6878 6879 if (auto *I = dyn_cast<LoadInst>(V)) 6880 if (I->hasMetadata(LLVMContext::MD_noundef) || 6881 I->hasMetadata(LLVMContext::MD_dereferenceable) || 6882 I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)) 6883 return true; 6884 6885 if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind))) 6886 return true; 6887 6888 // CxtI may be null or a cloned instruction. 6889 if (!CtxI || !CtxI->getParent() || !DT) 6890 return false; 6891 6892 auto *DNode = DT->getNode(CtxI->getParent()); 6893 if (!DNode) 6894 // Unreachable block 6895 return false; 6896 6897 // If V is used as a branch condition before reaching CtxI, V cannot be 6898 // undef or poison. 6899 // br V, BB1, BB2 6900 // BB1: 6901 // CtxI ; V cannot be undef or poison here 6902 auto *Dominator = DNode->getIDom(); 6903 while (Dominator) { 6904 auto *TI = Dominator->getBlock()->getTerminator(); 6905 6906 Value *Cond = nullptr; 6907 if (auto BI = dyn_cast_or_null<BranchInst>(TI)) { 6908 if (BI->isConditional()) 6909 Cond = BI->getCondition(); 6910 } else if (auto SI = dyn_cast_or_null<SwitchInst>(TI)) { 6911 Cond = SI->getCondition(); 6912 } 6913 6914 if (Cond) { 6915 if (Cond == V) 6916 return true; 6917 else if (!includesUndef(Kind) && isa<Operator>(Cond)) { 6918 // For poison, we can analyze further 6919 auto *Opr = cast<Operator>(Cond); 6920 if (any_of(Opr->operands(), 6921 [V](const Use &U) { return V == U && propagatesPoison(U); })) 6922 return true; 6923 } 6924 } 6925 6926 Dominator = Dominator->getIDom(); 6927 } 6928 6929 if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC)) 6930 return true; 6931 6932 return false; 6933 } 6934 6935 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC, 6936 const Instruction *CtxI, 6937 const DominatorTree *DT, 6938 unsigned Depth) { 6939 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6940 UndefPoisonKind::UndefOrPoison); 6941 } 6942 6943 bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, 6944 const Instruction *CtxI, 6945 const DominatorTree *DT, unsigned Depth) { 6946 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6947 UndefPoisonKind::PoisonOnly); 6948 } 6949 6950 bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC, 6951 const Instruction *CtxI, 6952 const DominatorTree *DT, unsigned Depth) { 6953 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, 6954 UndefPoisonKind::UndefOnly); 6955 } 6956 6957 /// Return true if undefined behavior would provably be executed on the path to 6958 /// OnPathTo if Root produced a posion result. Note that this doesn't say 6959 /// anything about whether OnPathTo is actually executed or whether Root is 6960 /// actually poison. This can be used to assess whether a new use of Root can 6961 /// be added at a location which is control equivalent with OnPathTo (such as 6962 /// immediately before it) without introducing UB which didn't previously 6963 /// exist. Note that a false result conveys no information. 6964 bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 6965 Instruction *OnPathTo, 6966 DominatorTree *DT) { 6967 // Basic approach is to assume Root is poison, propagate poison forward 6968 // through all users we can easily track, and then check whether any of those 6969 // users are provable UB and must execute before out exiting block might 6970 // exit. 6971 6972 // The set of all recursive users we've visited (which are assumed to all be 6973 // poison because of said visit) 6974 SmallSet<const Value *, 16> KnownPoison; 6975 SmallVector<const Instruction*, 16> Worklist; 6976 Worklist.push_back(Root); 6977 while (!Worklist.empty()) { 6978 const Instruction *I = Worklist.pop_back_val(); 6979 6980 // If we know this must trigger UB on a path leading our target. 6981 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 6982 return true; 6983 6984 // If we can't analyze propagation through this instruction, just skip it 6985 // and transitive users. Safe as false is a conservative result. 6986 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { 6987 return KnownPoison.contains(U) && propagatesPoison(U); 6988 })) 6989 continue; 6990 6991 if (KnownPoison.insert(I).second) 6992 for (const User *User : I->users()) 6993 Worklist.push_back(cast<Instruction>(User)); 6994 } 6995 6996 // Might be non-UB, or might have a path we couldn't prove must execute on 6997 // way to exiting bb. 6998 return false; 6999 } 7000 7001 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, 7002 const SimplifyQuery &SQ) { 7003 return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), 7004 Add, SQ); 7005 } 7006 7007 OverflowResult 7008 llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, 7009 const WithCache<const Value *> &RHS, 7010 const SimplifyQuery &SQ) { 7011 return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ); 7012 } 7013 7014 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { 7015 // Note: An atomic operation isn't guaranteed to return in a reasonable amount 7016 // of time because it's possible for another thread to interfere with it for an 7017 // arbitrary length of time, but programs aren't allowed to rely on that. 7018 7019 // If there is no successor, then execution can't transfer to it. 7020 if (isa<ReturnInst>(I)) 7021 return false; 7022 if (isa<UnreachableInst>(I)) 7023 return false; 7024 7025 // Note: Do not add new checks here; instead, change Instruction::mayThrow or 7026 // Instruction::willReturn. 7027 // 7028 // FIXME: Move this check into Instruction::willReturn. 7029 if (isa<CatchPadInst>(I)) { 7030 switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) { 7031 default: 7032 // A catchpad may invoke exception object constructors and such, which 7033 // in some languages can be arbitrary code, so be conservative by default. 7034 return false; 7035 case EHPersonality::CoreCLR: 7036 // For CoreCLR, it just involves a type test. 7037 return true; 7038 } 7039 } 7040 7041 // An instruction that returns without throwing must transfer control flow 7042 // to a successor. 7043 return !I->mayThrow() && I->willReturn(); 7044 } 7045 7046 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { 7047 // TODO: This is slightly conservative for invoke instruction since exiting 7048 // via an exception *is* normal control for them. 7049 for (const Instruction &I : *BB) 7050 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7051 return false; 7052 return true; 7053 } 7054 7055 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7056 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, 7057 unsigned ScanLimit) { 7058 return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End), 7059 ScanLimit); 7060 } 7061 7062 bool llvm::isGuaranteedToTransferExecutionToSuccessor( 7063 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) { 7064 assert(ScanLimit && "scan limit must be non-zero"); 7065 for (const Instruction &I : Range) { 7066 if (isa<DbgInfoIntrinsic>(I)) 7067 continue; 7068 if (--ScanLimit == 0) 7069 return false; 7070 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7071 return false; 7072 } 7073 return true; 7074 } 7075 7076 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, 7077 const Loop *L) { 7078 // The loop header is guaranteed to be executed for every iteration. 7079 // 7080 // FIXME: Relax this constraint to cover all basic blocks that are 7081 // guaranteed to be executed at every iteration. 7082 if (I->getParent() != L->getHeader()) return false; 7083 7084 for (const Instruction &LI : *L->getHeader()) { 7085 if (&LI == I) return true; 7086 if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; 7087 } 7088 llvm_unreachable("Instruction not contained in its own parent basic block."); 7089 } 7090 7091 bool llvm::propagatesPoison(const Use &PoisonOp) { 7092 const Operator *I = cast<Operator>(PoisonOp.getUser()); 7093 switch (I->getOpcode()) { 7094 case Instruction::Freeze: 7095 case Instruction::PHI: 7096 case Instruction::Invoke: 7097 return false; 7098 case Instruction::Select: 7099 return PoisonOp.getOperandNo() == 0; 7100 case Instruction::Call: 7101 if (auto *II = dyn_cast<IntrinsicInst>(I)) { 7102 switch (II->getIntrinsicID()) { 7103 // TODO: Add more intrinsics. 7104 case Intrinsic::sadd_with_overflow: 7105 case Intrinsic::ssub_with_overflow: 7106 case Intrinsic::smul_with_overflow: 7107 case Intrinsic::uadd_with_overflow: 7108 case Intrinsic::usub_with_overflow: 7109 case Intrinsic::umul_with_overflow: 7110 // If an input is a vector containing a poison element, the 7111 // two output vectors (calculated results, overflow bits)' 7112 // corresponding lanes are poison. 7113 return true; 7114 case Intrinsic::ctpop: 7115 return true; 7116 } 7117 } 7118 return false; 7119 case Instruction::ICmp: 7120 case Instruction::FCmp: 7121 case Instruction::GetElementPtr: 7122 return true; 7123 default: 7124 if (isa<BinaryOperator>(I) || isa<UnaryOperator>(I) || isa<CastInst>(I)) 7125 return true; 7126 7127 // Be conservative and return false. 7128 return false; 7129 } 7130 } 7131 7132 void llvm::getGuaranteedWellDefinedOps( 7133 const Instruction *I, SmallVectorImpl<const Value *> &Operands) { 7134 switch (I->getOpcode()) { 7135 case Instruction::Store: 7136 Operands.push_back(cast<StoreInst>(I)->getPointerOperand()); 7137 break; 7138 7139 case Instruction::Load: 7140 Operands.push_back(cast<LoadInst>(I)->getPointerOperand()); 7141 break; 7142 7143 // Since dereferenceable attribute imply noundef, atomic operations 7144 // also implicitly have noundef pointers too 7145 case Instruction::AtomicCmpXchg: 7146 Operands.push_back(cast<AtomicCmpXchgInst>(I)->getPointerOperand()); 7147 break; 7148 7149 case Instruction::AtomicRMW: 7150 Operands.push_back(cast<AtomicRMWInst>(I)->getPointerOperand()); 7151 break; 7152 7153 case Instruction::Call: 7154 case Instruction::Invoke: { 7155 const CallBase *CB = cast<CallBase>(I); 7156 if (CB->isIndirectCall()) 7157 Operands.push_back(CB->getCalledOperand()); 7158 for (unsigned i = 0; i < CB->arg_size(); ++i) { 7159 if (CB->paramHasAttr(i, Attribute::NoUndef) || 7160 CB->paramHasAttr(i, Attribute::Dereferenceable) || 7161 CB->paramHasAttr(i, Attribute::DereferenceableOrNull)) 7162 Operands.push_back(CB->getArgOperand(i)); 7163 } 7164 break; 7165 } 7166 case Instruction::Ret: 7167 if (I->getFunction()->hasRetAttribute(Attribute::NoUndef)) 7168 Operands.push_back(I->getOperand(0)); 7169 break; 7170 case Instruction::Switch: 7171 Operands.push_back(cast<SwitchInst>(I)->getCondition()); 7172 break; 7173 case Instruction::Br: { 7174 auto *BR = cast<BranchInst>(I); 7175 if (BR->isConditional()) 7176 Operands.push_back(BR->getCondition()); 7177 break; 7178 } 7179 default: 7180 break; 7181 } 7182 } 7183 7184 void llvm::getGuaranteedNonPoisonOps(const Instruction *I, 7185 SmallVectorImpl<const Value *> &Operands) { 7186 getGuaranteedWellDefinedOps(I, Operands); 7187 switch (I->getOpcode()) { 7188 // Divisors of these operations are allowed to be partially undef. 7189 case Instruction::UDiv: 7190 case Instruction::SDiv: 7191 case Instruction::URem: 7192 case Instruction::SRem: 7193 Operands.push_back(I->getOperand(1)); 7194 break; 7195 default: 7196 break; 7197 } 7198 } 7199 7200 bool llvm::mustTriggerUB(const Instruction *I, 7201 const SmallPtrSetImpl<const Value *> &KnownPoison) { 7202 SmallVector<const Value *, 4> NonPoisonOps; 7203 getGuaranteedNonPoisonOps(I, NonPoisonOps); 7204 7205 for (const auto *V : NonPoisonOps) 7206 if (KnownPoison.count(V)) 7207 return true; 7208 7209 return false; 7210 } 7211 7212 static bool programUndefinedIfUndefOrPoison(const Value *V, 7213 bool PoisonOnly) { 7214 // We currently only look for uses of values within the same basic 7215 // block, as that makes it easier to guarantee that the uses will be 7216 // executed given that Inst is executed. 7217 // 7218 // FIXME: Expand this to consider uses beyond the same basic block. To do 7219 // this, look out for the distinction between post-dominance and strong 7220 // post-dominance. 7221 const BasicBlock *BB = nullptr; 7222 BasicBlock::const_iterator Begin; 7223 if (const auto *Inst = dyn_cast<Instruction>(V)) { 7224 BB = Inst->getParent(); 7225 Begin = Inst->getIterator(); 7226 Begin++; 7227 } else if (const auto *Arg = dyn_cast<Argument>(V)) { 7228 if (Arg->getParent()->isDeclaration()) 7229 return false; 7230 BB = &Arg->getParent()->getEntryBlock(); 7231 Begin = BB->begin(); 7232 } else { 7233 return false; 7234 } 7235 7236 // Limit number of instructions we look at, to avoid scanning through large 7237 // blocks. The current limit is chosen arbitrarily. 7238 unsigned ScanLimit = 32; 7239 BasicBlock::const_iterator End = BB->end(); 7240 7241 if (!PoisonOnly) { 7242 // Since undef does not propagate eagerly, be conservative & just check 7243 // whether a value is directly passed to an instruction that must take 7244 // well-defined operands. 7245 7246 for (const auto &I : make_range(Begin, End)) { 7247 if (isa<DbgInfoIntrinsic>(I)) 7248 continue; 7249 if (--ScanLimit == 0) 7250 break; 7251 7252 SmallVector<const Value *, 4> WellDefinedOps; 7253 getGuaranteedWellDefinedOps(&I, WellDefinedOps); 7254 if (is_contained(WellDefinedOps, V)) 7255 return true; 7256 7257 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7258 break; 7259 } 7260 return false; 7261 } 7262 7263 // Set of instructions that we have proved will yield poison if Inst 7264 // does. 7265 SmallSet<const Value *, 16> YieldsPoison; 7266 SmallSet<const BasicBlock *, 4> Visited; 7267 7268 YieldsPoison.insert(V); 7269 Visited.insert(BB); 7270 7271 while (true) { 7272 for (const auto &I : make_range(Begin, End)) { 7273 if (isa<DbgInfoIntrinsic>(I)) 7274 continue; 7275 if (--ScanLimit == 0) 7276 return false; 7277 if (mustTriggerUB(&I, YieldsPoison)) 7278 return true; 7279 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 7280 return false; 7281 7282 // If an operand is poison and propagates it, mark I as yielding poison. 7283 for (const Use &Op : I.operands()) { 7284 if (YieldsPoison.count(Op) && propagatesPoison(Op)) { 7285 YieldsPoison.insert(&I); 7286 break; 7287 } 7288 } 7289 7290 // Special handling for select, which returns poison if its operand 0 is 7291 // poison (handled in the loop above) *or* if both its true/false operands 7292 // are poison (handled here). 7293 if (I.getOpcode() == Instruction::Select && 7294 YieldsPoison.count(I.getOperand(1)) && 7295 YieldsPoison.count(I.getOperand(2))) { 7296 YieldsPoison.insert(&I); 7297 } 7298 } 7299 7300 BB = BB->getSingleSuccessor(); 7301 if (!BB || !Visited.insert(BB).second) 7302 break; 7303 7304 Begin = BB->getFirstNonPHI()->getIterator(); 7305 End = BB->end(); 7306 } 7307 return false; 7308 } 7309 7310 bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) { 7311 return ::programUndefinedIfUndefOrPoison(Inst, false); 7312 } 7313 7314 bool llvm::programUndefinedIfPoison(const Instruction *Inst) { 7315 return ::programUndefinedIfUndefOrPoison(Inst, true); 7316 } 7317 7318 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) { 7319 if (FMF.noNaNs()) 7320 return true; 7321 7322 if (auto *C = dyn_cast<ConstantFP>(V)) 7323 return !C->isNaN(); 7324 7325 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7326 if (!C->getElementType()->isFloatingPointTy()) 7327 return false; 7328 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7329 if (C->getElementAsAPFloat(I).isNaN()) 7330 return false; 7331 } 7332 return true; 7333 } 7334 7335 if (isa<ConstantAggregateZero>(V)) 7336 return true; 7337 7338 return false; 7339 } 7340 7341 static bool isKnownNonZero(const Value *V) { 7342 if (auto *C = dyn_cast<ConstantFP>(V)) 7343 return !C->isZero(); 7344 7345 if (auto *C = dyn_cast<ConstantDataVector>(V)) { 7346 if (!C->getElementType()->isFloatingPointTy()) 7347 return false; 7348 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) { 7349 if (C->getElementAsAPFloat(I).isZero()) 7350 return false; 7351 } 7352 return true; 7353 } 7354 7355 return false; 7356 } 7357 7358 /// Match clamp pattern for float types without care about NaNs or signed zeros. 7359 /// Given non-min/max outer cmp/select from the clamp pattern this 7360 /// function recognizes if it can be substitued by a "canonical" min/max 7361 /// pattern. 7362 static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, 7363 Value *CmpLHS, Value *CmpRHS, 7364 Value *TrueVal, Value *FalseVal, 7365 Value *&LHS, Value *&RHS) { 7366 // Try to match 7367 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) 7368 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) 7369 // and return description of the outer Max/Min. 7370 7371 // First, check if select has inverse order: 7372 if (CmpRHS == FalseVal) { 7373 std::swap(TrueVal, FalseVal); 7374 Pred = CmpInst::getInversePredicate(Pred); 7375 } 7376 7377 // Assume success now. If there's no match, callers should not use these anyway. 7378 LHS = TrueVal; 7379 RHS = FalseVal; 7380 7381 const APFloat *FC1; 7382 if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) 7383 return {SPF_UNKNOWN, SPNB_NA, false}; 7384 7385 const APFloat *FC2; 7386 switch (Pred) { 7387 case CmpInst::FCMP_OLT: 7388 case CmpInst::FCMP_OLE: 7389 case CmpInst::FCMP_ULT: 7390 case CmpInst::FCMP_ULE: 7391 if (match(FalseVal, 7392 m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), 7393 m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7394 *FC1 < *FC2) 7395 return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; 7396 break; 7397 case CmpInst::FCMP_OGT: 7398 case CmpInst::FCMP_OGE: 7399 case CmpInst::FCMP_UGT: 7400 case CmpInst::FCMP_UGE: 7401 if (match(FalseVal, 7402 m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), 7403 m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && 7404 *FC1 > *FC2) 7405 return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; 7406 break; 7407 default: 7408 break; 7409 } 7410 7411 return {SPF_UNKNOWN, SPNB_NA, false}; 7412 } 7413 7414 /// Recognize variations of: 7415 /// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v))) 7416 static SelectPatternResult matchClamp(CmpInst::Predicate Pred, 7417 Value *CmpLHS, Value *CmpRHS, 7418 Value *TrueVal, Value *FalseVal) { 7419 // Swap the select operands and predicate to match the patterns below. 7420 if (CmpRHS != TrueVal) { 7421 Pred = ICmpInst::getSwappedPredicate(Pred); 7422 std::swap(TrueVal, FalseVal); 7423 } 7424 const APInt *C1; 7425 if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) { 7426 const APInt *C2; 7427 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1) 7428 if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) && 7429 C1->slt(*C2) && Pred == CmpInst::ICMP_SLT) 7430 return {SPF_SMAX, SPNB_NA, false}; 7431 7432 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1) 7433 if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) && 7434 C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT) 7435 return {SPF_SMIN, SPNB_NA, false}; 7436 7437 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1) 7438 if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) && 7439 C1->ult(*C2) && Pred == CmpInst::ICMP_ULT) 7440 return {SPF_UMAX, SPNB_NA, false}; 7441 7442 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1) 7443 if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) && 7444 C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT) 7445 return {SPF_UMIN, SPNB_NA, false}; 7446 } 7447 return {SPF_UNKNOWN, SPNB_NA, false}; 7448 } 7449 7450 /// Recognize variations of: 7451 /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) 7452 static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, 7453 Value *CmpLHS, Value *CmpRHS, 7454 Value *TVal, Value *FVal, 7455 unsigned Depth) { 7456 // TODO: Allow FP min/max with nnan/nsz. 7457 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); 7458 7459 Value *A = nullptr, *B = nullptr; 7460 SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); 7461 if (!SelectPatternResult::isMinOrMax(L.Flavor)) 7462 return {SPF_UNKNOWN, SPNB_NA, false}; 7463 7464 Value *C = nullptr, *D = nullptr; 7465 SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); 7466 if (L.Flavor != R.Flavor) 7467 return {SPF_UNKNOWN, SPNB_NA, false}; 7468 7469 // We have something like: x Pred y ? min(a, b) : min(c, d). 7470 // Try to match the compare to the min/max operations of the select operands. 7471 // First, make sure we have the right compare predicate. 7472 switch (L.Flavor) { 7473 case SPF_SMIN: 7474 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { 7475 Pred = ICmpInst::getSwappedPredicate(Pred); 7476 std::swap(CmpLHS, CmpRHS); 7477 } 7478 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) 7479 break; 7480 return {SPF_UNKNOWN, SPNB_NA, false}; 7481 case SPF_SMAX: 7482 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { 7483 Pred = ICmpInst::getSwappedPredicate(Pred); 7484 std::swap(CmpLHS, CmpRHS); 7485 } 7486 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) 7487 break; 7488 return {SPF_UNKNOWN, SPNB_NA, false}; 7489 case SPF_UMIN: 7490 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { 7491 Pred = ICmpInst::getSwappedPredicate(Pred); 7492 std::swap(CmpLHS, CmpRHS); 7493 } 7494 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) 7495 break; 7496 return {SPF_UNKNOWN, SPNB_NA, false}; 7497 case SPF_UMAX: 7498 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { 7499 Pred = ICmpInst::getSwappedPredicate(Pred); 7500 std::swap(CmpLHS, CmpRHS); 7501 } 7502 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) 7503 break; 7504 return {SPF_UNKNOWN, SPNB_NA, false}; 7505 default: 7506 return {SPF_UNKNOWN, SPNB_NA, false}; 7507 } 7508 7509 // If there is a common operand in the already matched min/max and the other 7510 // min/max operands match the compare operands (either directly or inverted), 7511 // then this is min/max of the same flavor. 7512 7513 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7514 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) 7515 if (D == B) { 7516 if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7517 match(A, m_Not(m_Specific(CmpRHS))))) 7518 return {L.Flavor, SPNB_NA, false}; 7519 } 7520 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7521 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) 7522 if (C == B) { 7523 if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7524 match(A, m_Not(m_Specific(CmpRHS))))) 7525 return {L.Flavor, SPNB_NA, false}; 7526 } 7527 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7528 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) 7529 if (D == A) { 7530 if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && 7531 match(B, m_Not(m_Specific(CmpRHS))))) 7532 return {L.Flavor, SPNB_NA, false}; 7533 } 7534 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7535 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) 7536 if (C == A) { 7537 if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && 7538 match(B, m_Not(m_Specific(CmpRHS))))) 7539 return {L.Flavor, SPNB_NA, false}; 7540 } 7541 7542 return {SPF_UNKNOWN, SPNB_NA, false}; 7543 } 7544 7545 /// If the input value is the result of a 'not' op, constant integer, or vector 7546 /// splat of a constant integer, return the bitwise-not source value. 7547 /// TODO: This could be extended to handle non-splat vector integer constants. 7548 static Value *getNotValue(Value *V) { 7549 Value *NotV; 7550 if (match(V, m_Not(m_Value(NotV)))) 7551 return NotV; 7552 7553 const APInt *C; 7554 if (match(V, m_APInt(C))) 7555 return ConstantInt::get(V->getType(), ~(*C)); 7556 7557 return nullptr; 7558 } 7559 7560 /// Match non-obvious integer minimum and maximum sequences. 7561 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, 7562 Value *CmpLHS, Value *CmpRHS, 7563 Value *TrueVal, Value *FalseVal, 7564 Value *&LHS, Value *&RHS, 7565 unsigned Depth) { 7566 // Assume success. If there's no match, callers should not use these anyway. 7567 LHS = TrueVal; 7568 RHS = FalseVal; 7569 7570 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); 7571 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7572 return SPR; 7573 7574 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); 7575 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) 7576 return SPR; 7577 7578 // Look through 'not' ops to find disguised min/max. 7579 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y) 7580 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y) 7581 if (CmpLHS == getNotValue(TrueVal) && CmpRHS == getNotValue(FalseVal)) { 7582 switch (Pred) { 7583 case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false}; 7584 case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false}; 7585 case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false}; 7586 case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false}; 7587 default: break; 7588 } 7589 } 7590 7591 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X) 7592 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X) 7593 if (CmpLHS == getNotValue(FalseVal) && CmpRHS == getNotValue(TrueVal)) { 7594 switch (Pred) { 7595 case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false}; 7596 case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false}; 7597 case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false}; 7598 case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false}; 7599 default: break; 7600 } 7601 } 7602 7603 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) 7604 return {SPF_UNKNOWN, SPNB_NA, false}; 7605 7606 const APInt *C1; 7607 if (!match(CmpRHS, m_APInt(C1))) 7608 return {SPF_UNKNOWN, SPNB_NA, false}; 7609 7610 // An unsigned min/max can be written with a signed compare. 7611 const APInt *C2; 7612 if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) || 7613 (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) { 7614 // Is the sign bit set? 7615 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX 7616 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN 7617 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue()) 7618 return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7619 7620 // Is the sign bit clear? 7621 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX 7622 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN 7623 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue()) 7624 return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false}; 7625 } 7626 7627 return {SPF_UNKNOWN, SPNB_NA, false}; 7628 } 7629 7630 bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW) { 7631 assert(X && Y && "Invalid operand"); 7632 7633 // X = sub (0, Y) || X = sub nsw (0, Y) 7634 if ((!NeedNSW && match(X, m_Sub(m_ZeroInt(), m_Specific(Y)))) || 7635 (NeedNSW && match(X, m_NSWSub(m_ZeroInt(), m_Specific(Y))))) 7636 return true; 7637 7638 // Y = sub (0, X) || Y = sub nsw (0, X) 7639 if ((!NeedNSW && match(Y, m_Sub(m_ZeroInt(), m_Specific(X)))) || 7640 (NeedNSW && match(Y, m_NSWSub(m_ZeroInt(), m_Specific(X))))) 7641 return true; 7642 7643 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A) 7644 Value *A, *B; 7645 return (!NeedNSW && (match(X, m_Sub(m_Value(A), m_Value(B))) && 7646 match(Y, m_Sub(m_Specific(B), m_Specific(A))))) || 7647 (NeedNSW && (match(X, m_NSWSub(m_Value(A), m_Value(B))) && 7648 match(Y, m_NSWSub(m_Specific(B), m_Specific(A))))); 7649 } 7650 7651 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, 7652 FastMathFlags FMF, 7653 Value *CmpLHS, Value *CmpRHS, 7654 Value *TrueVal, Value *FalseVal, 7655 Value *&LHS, Value *&RHS, 7656 unsigned Depth) { 7657 bool HasMismatchedZeros = false; 7658 if (CmpInst::isFPPredicate(Pred)) { 7659 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one 7660 // 0.0 operand, set the compare's 0.0 operands to that same value for the 7661 // purpose of identifying min/max. Disregard vector constants with undefined 7662 // elements because those can not be back-propagated for analysis. 7663 Value *OutputZeroVal = nullptr; 7664 if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) && 7665 !cast<Constant>(TrueVal)->containsUndefOrPoisonElement()) 7666 OutputZeroVal = TrueVal; 7667 else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) && 7668 !cast<Constant>(FalseVal)->containsUndefOrPoisonElement()) 7669 OutputZeroVal = FalseVal; 7670 7671 if (OutputZeroVal) { 7672 if (match(CmpLHS, m_AnyZeroFP()) && CmpLHS != OutputZeroVal) { 7673 HasMismatchedZeros = true; 7674 CmpLHS = OutputZeroVal; 7675 } 7676 if (match(CmpRHS, m_AnyZeroFP()) && CmpRHS != OutputZeroVal) { 7677 HasMismatchedZeros = true; 7678 CmpRHS = OutputZeroVal; 7679 } 7680 } 7681 } 7682 7683 LHS = CmpLHS; 7684 RHS = CmpRHS; 7685 7686 // Signed zero may return inconsistent results between implementations. 7687 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 7688 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) 7689 // Therefore, we behave conservatively and only proceed if at least one of the 7690 // operands is known to not be zero or if we don't care about signed zero. 7691 switch (Pred) { 7692 default: break; 7693 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT: 7694 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT: 7695 if (!HasMismatchedZeros) 7696 break; 7697 [[fallthrough]]; 7698 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: 7699 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: 7700 if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7701 !isKnownNonZero(CmpRHS)) 7702 return {SPF_UNKNOWN, SPNB_NA, false}; 7703 } 7704 7705 SelectPatternNaNBehavior NaNBehavior = SPNB_NA; 7706 bool Ordered = false; 7707 7708 // When given one NaN and one non-NaN input: 7709 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. 7710 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the 7711 // ordered comparison fails), which could be NaN or non-NaN. 7712 // so here we discover exactly what NaN behavior is required/accepted. 7713 if (CmpInst::isFPPredicate(Pred)) { 7714 bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); 7715 bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); 7716 7717 if (LHSSafe && RHSSafe) { 7718 // Both operands are known non-NaN. 7719 NaNBehavior = SPNB_RETURNS_ANY; 7720 } else if (CmpInst::isOrdered(Pred)) { 7721 // An ordered comparison will return false when given a NaN, so it 7722 // returns the RHS. 7723 Ordered = true; 7724 if (LHSSafe) 7725 // LHS is non-NaN, so if RHS is NaN then NaN will be returned. 7726 NaNBehavior = SPNB_RETURNS_NAN; 7727 else if (RHSSafe) 7728 NaNBehavior = SPNB_RETURNS_OTHER; 7729 else 7730 // Completely unsafe. 7731 return {SPF_UNKNOWN, SPNB_NA, false}; 7732 } else { 7733 Ordered = false; 7734 // An unordered comparison will return true when given a NaN, so it 7735 // returns the LHS. 7736 if (LHSSafe) 7737 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. 7738 NaNBehavior = SPNB_RETURNS_OTHER; 7739 else if (RHSSafe) 7740 NaNBehavior = SPNB_RETURNS_NAN; 7741 else 7742 // Completely unsafe. 7743 return {SPF_UNKNOWN, SPNB_NA, false}; 7744 } 7745 } 7746 7747 if (TrueVal == CmpRHS && FalseVal == CmpLHS) { 7748 std::swap(CmpLHS, CmpRHS); 7749 Pred = CmpInst::getSwappedPredicate(Pred); 7750 if (NaNBehavior == SPNB_RETURNS_NAN) 7751 NaNBehavior = SPNB_RETURNS_OTHER; 7752 else if (NaNBehavior == SPNB_RETURNS_OTHER) 7753 NaNBehavior = SPNB_RETURNS_NAN; 7754 Ordered = !Ordered; 7755 } 7756 7757 // ([if]cmp X, Y) ? X : Y 7758 if (TrueVal == CmpLHS && FalseVal == CmpRHS) { 7759 switch (Pred) { 7760 default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. 7761 case ICmpInst::ICMP_UGT: 7762 case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; 7763 case ICmpInst::ICMP_SGT: 7764 case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; 7765 case ICmpInst::ICMP_ULT: 7766 case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; 7767 case ICmpInst::ICMP_SLT: 7768 case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; 7769 case FCmpInst::FCMP_UGT: 7770 case FCmpInst::FCMP_UGE: 7771 case FCmpInst::FCMP_OGT: 7772 case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; 7773 case FCmpInst::FCMP_ULT: 7774 case FCmpInst::FCMP_ULE: 7775 case FCmpInst::FCMP_OLT: 7776 case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; 7777 } 7778 } 7779 7780 if (isKnownNegation(TrueVal, FalseVal)) { 7781 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can 7782 // match against either LHS or sext(LHS). 7783 auto MaybeSExtCmpLHS = 7784 m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS))); 7785 auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes()); 7786 auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One()); 7787 if (match(TrueVal, MaybeSExtCmpLHS)) { 7788 // Set the return values. If the compare uses the negated value (-X >s 0), 7789 // swap the return values because the negated value is always 'RHS'. 7790 LHS = TrueVal; 7791 RHS = FalseVal; 7792 if (match(CmpLHS, m_Neg(m_Specific(FalseVal)))) 7793 std::swap(LHS, RHS); 7794 7795 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) 7796 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) 7797 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7798 return {SPF_ABS, SPNB_NA, false}; 7799 7800 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X) 7801 if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne)) 7802 return {SPF_ABS, SPNB_NA, false}; 7803 7804 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X) 7805 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X) 7806 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7807 return {SPF_NABS, SPNB_NA, false}; 7808 } 7809 else if (match(FalseVal, MaybeSExtCmpLHS)) { 7810 // Set the return values. If the compare uses the negated value (-X >s 0), 7811 // swap the return values because the negated value is always 'RHS'. 7812 LHS = FalseVal; 7813 RHS = TrueVal; 7814 if (match(CmpLHS, m_Neg(m_Specific(TrueVal)))) 7815 std::swap(LHS, RHS); 7816 7817 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X) 7818 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X) 7819 if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) 7820 return {SPF_NABS, SPNB_NA, false}; 7821 7822 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X) 7823 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X) 7824 if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) 7825 return {SPF_ABS, SPNB_NA, false}; 7826 } 7827 } 7828 7829 if (CmpInst::isIntPredicate(Pred)) 7830 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); 7831 7832 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar 7833 // may return either -0.0 or 0.0, so fcmp/select pair has stricter 7834 // semantics than minNum. Be conservative in such case. 7835 if (NaNBehavior != SPNB_RETURNS_ANY || 7836 (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && 7837 !isKnownNonZero(CmpRHS))) 7838 return {SPF_UNKNOWN, SPNB_NA, false}; 7839 7840 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); 7841 } 7842 7843 /// Helps to match a select pattern in case of a type mismatch. 7844 /// 7845 /// The function processes the case when type of true and false values of a 7846 /// select instruction differs from type of the cmp instruction operands because 7847 /// of a cast instruction. The function checks if it is legal to move the cast 7848 /// operation after "select". If yes, it returns the new second value of 7849 /// "select" (with the assumption that cast is moved): 7850 /// 1. As operand of cast instruction when both values of "select" are same cast 7851 /// instructions. 7852 /// 2. As restored constant (by applying reverse cast operation) when the first 7853 /// value of the "select" is a cast operation and the second value is a 7854 /// constant. 7855 /// NOTE: We return only the new second value because the first value could be 7856 /// accessed as operand of cast instruction. 7857 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, 7858 Instruction::CastOps *CastOp) { 7859 auto *Cast1 = dyn_cast<CastInst>(V1); 7860 if (!Cast1) 7861 return nullptr; 7862 7863 *CastOp = Cast1->getOpcode(); 7864 Type *SrcTy = Cast1->getSrcTy(); 7865 if (auto *Cast2 = dyn_cast<CastInst>(V2)) { 7866 // If V1 and V2 are both the same cast from the same type, look through V1. 7867 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy()) 7868 return Cast2->getOperand(0); 7869 return nullptr; 7870 } 7871 7872 auto *C = dyn_cast<Constant>(V2); 7873 if (!C) 7874 return nullptr; 7875 7876 const DataLayout &DL = CmpI->getModule()->getDataLayout(); 7877 Constant *CastedTo = nullptr; 7878 switch (*CastOp) { 7879 case Instruction::ZExt: 7880 if (CmpI->isUnsigned()) 7881 CastedTo = ConstantExpr::getTrunc(C, SrcTy); 7882 break; 7883 case Instruction::SExt: 7884 if (CmpI->isSigned()) 7885 CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); 7886 break; 7887 case Instruction::Trunc: 7888 Constant *CmpConst; 7889 if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && 7890 CmpConst->getType() == SrcTy) { 7891 // Here we have the following case: 7892 // 7893 // %cond = cmp iN %x, CmpConst 7894 // %tr = trunc iN %x to iK 7895 // %narrowsel = select i1 %cond, iK %t, iK C 7896 // 7897 // We can always move trunc after select operation: 7898 // 7899 // %cond = cmp iN %x, CmpConst 7900 // %widesel = select i1 %cond, iN %x, iN CmpConst 7901 // %tr = trunc iN %widesel to iK 7902 // 7903 // Note that C could be extended in any way because we don't care about 7904 // upper bits after truncation. It can't be abs pattern, because it would 7905 // look like: 7906 // 7907 // select i1 %cond, x, -x. 7908 // 7909 // So only min/max pattern could be matched. Such match requires widened C 7910 // == CmpConst. That is why set widened C = CmpConst, condition trunc 7911 // CmpConst == C is checked below. 7912 CastedTo = CmpConst; 7913 } else { 7914 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt; 7915 CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL); 7916 } 7917 break; 7918 case Instruction::FPTrunc: 7919 CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL); 7920 break; 7921 case Instruction::FPExt: 7922 CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL); 7923 break; 7924 case Instruction::FPToUI: 7925 CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL); 7926 break; 7927 case Instruction::FPToSI: 7928 CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL); 7929 break; 7930 case Instruction::UIToFP: 7931 CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL); 7932 break; 7933 case Instruction::SIToFP: 7934 CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL); 7935 break; 7936 default: 7937 break; 7938 } 7939 7940 if (!CastedTo) 7941 return nullptr; 7942 7943 // Make sure the cast doesn't lose any information. 7944 Constant *CastedBack = 7945 ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL); 7946 if (CastedBack && CastedBack != C) 7947 return nullptr; 7948 7949 return CastedTo; 7950 } 7951 7952 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, 7953 Instruction::CastOps *CastOp, 7954 unsigned Depth) { 7955 if (Depth >= MaxAnalysisRecursionDepth) 7956 return {SPF_UNKNOWN, SPNB_NA, false}; 7957 7958 SelectInst *SI = dyn_cast<SelectInst>(V); 7959 if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; 7960 7961 CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); 7962 if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; 7963 7964 Value *TrueVal = SI->getTrueValue(); 7965 Value *FalseVal = SI->getFalseValue(); 7966 7967 return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS, 7968 CastOp, Depth); 7969 } 7970 7971 SelectPatternResult llvm::matchDecomposedSelectPattern( 7972 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS, 7973 Instruction::CastOps *CastOp, unsigned Depth) { 7974 CmpInst::Predicate Pred = CmpI->getPredicate(); 7975 Value *CmpLHS = CmpI->getOperand(0); 7976 Value *CmpRHS = CmpI->getOperand(1); 7977 FastMathFlags FMF; 7978 if (isa<FPMathOperator>(CmpI)) 7979 FMF = CmpI->getFastMathFlags(); 7980 7981 // Bail out early. 7982 if (CmpI->isEquality()) 7983 return {SPF_UNKNOWN, SPNB_NA, false}; 7984 7985 // Deal with type mismatches. 7986 if (CastOp && CmpLHS->getType() != TrueVal->getType()) { 7987 if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { 7988 // If this is a potential fmin/fmax with a cast to integer, then ignore 7989 // -0.0 because there is no corresponding integer value. 7990 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 7991 FMF.setNoSignedZeros(); 7992 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 7993 cast<CastInst>(TrueVal)->getOperand(0), C, 7994 LHS, RHS, Depth); 7995 } 7996 if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { 7997 // If this is a potential fmin/fmax with a cast to integer, then ignore 7998 // -0.0 because there is no corresponding integer value. 7999 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) 8000 FMF.setNoSignedZeros(); 8001 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, 8002 C, cast<CastInst>(FalseVal)->getOperand(0), 8003 LHS, RHS, Depth); 8004 } 8005 } 8006 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, 8007 LHS, RHS, Depth); 8008 } 8009 8010 CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) { 8011 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT; 8012 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT; 8013 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT; 8014 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT; 8015 if (SPF == SPF_FMINNUM) 8016 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; 8017 if (SPF == SPF_FMAXNUM) 8018 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; 8019 llvm_unreachable("unhandled!"); 8020 } 8021 8022 SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) { 8023 if (SPF == SPF_SMIN) return SPF_SMAX; 8024 if (SPF == SPF_UMIN) return SPF_UMAX; 8025 if (SPF == SPF_SMAX) return SPF_SMIN; 8026 if (SPF == SPF_UMAX) return SPF_UMIN; 8027 llvm_unreachable("unhandled!"); 8028 } 8029 8030 Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) { 8031 switch (MinMaxID) { 8032 case Intrinsic::smax: return Intrinsic::smin; 8033 case Intrinsic::smin: return Intrinsic::smax; 8034 case Intrinsic::umax: return Intrinsic::umin; 8035 case Intrinsic::umin: return Intrinsic::umax; 8036 // Please note that next four intrinsics may produce the same result for 8037 // original and inverted case even if X != Y due to NaN is handled specially. 8038 case Intrinsic::maximum: return Intrinsic::minimum; 8039 case Intrinsic::minimum: return Intrinsic::maximum; 8040 case Intrinsic::maxnum: return Intrinsic::minnum; 8041 case Intrinsic::minnum: return Intrinsic::maxnum; 8042 default: llvm_unreachable("Unexpected intrinsic"); 8043 } 8044 } 8045 8046 APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { 8047 switch (SPF) { 8048 case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); 8049 case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); 8050 case SPF_UMAX: return APInt::getMaxValue(BitWidth); 8051 case SPF_UMIN: return APInt::getMinValue(BitWidth); 8052 default: llvm_unreachable("Unexpected flavor"); 8053 } 8054 } 8055 8056 std::pair<Intrinsic::ID, bool> 8057 llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { 8058 // Check if VL contains select instructions that can be folded into a min/max 8059 // vector intrinsic and return the intrinsic if it is possible. 8060 // TODO: Support floating point min/max. 8061 bool AllCmpSingleUse = true; 8062 SelectPatternResult SelectPattern; 8063 SelectPattern.Flavor = SPF_UNKNOWN; 8064 if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { 8065 Value *LHS, *RHS; 8066 auto CurrentPattern = matchSelectPattern(I, LHS, RHS); 8067 if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || 8068 CurrentPattern.Flavor == SPF_FMINNUM || 8069 CurrentPattern.Flavor == SPF_FMAXNUM || 8070 !I->getType()->isIntOrIntVectorTy()) 8071 return false; 8072 if (SelectPattern.Flavor != SPF_UNKNOWN && 8073 SelectPattern.Flavor != CurrentPattern.Flavor) 8074 return false; 8075 SelectPattern = CurrentPattern; 8076 AllCmpSingleUse &= 8077 match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value())); 8078 return true; 8079 })) { 8080 switch (SelectPattern.Flavor) { 8081 case SPF_SMIN: 8082 return {Intrinsic::smin, AllCmpSingleUse}; 8083 case SPF_UMIN: 8084 return {Intrinsic::umin, AllCmpSingleUse}; 8085 case SPF_SMAX: 8086 return {Intrinsic::smax, AllCmpSingleUse}; 8087 case SPF_UMAX: 8088 return {Intrinsic::umax, AllCmpSingleUse}; 8089 default: 8090 llvm_unreachable("unexpected select pattern flavor"); 8091 } 8092 } 8093 return {Intrinsic::not_intrinsic, false}; 8094 } 8095 8096 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, 8097 Value *&Start, Value *&Step) { 8098 // Handle the case of a simple two-predecessor recurrence PHI. 8099 // There's a lot more that could theoretically be done here, but 8100 // this is sufficient to catch some interesting cases. 8101 if (P->getNumIncomingValues() != 2) 8102 return false; 8103 8104 for (unsigned i = 0; i != 2; ++i) { 8105 Value *L = P->getIncomingValue(i); 8106 Value *R = P->getIncomingValue(!i); 8107 auto *LU = dyn_cast<BinaryOperator>(L); 8108 if (!LU) 8109 continue; 8110 unsigned Opcode = LU->getOpcode(); 8111 8112 switch (Opcode) { 8113 default: 8114 continue; 8115 // TODO: Expand list -- xor, div, gep, uaddo, etc.. 8116 case Instruction::LShr: 8117 case Instruction::AShr: 8118 case Instruction::Shl: 8119 case Instruction::Add: 8120 case Instruction::Sub: 8121 case Instruction::And: 8122 case Instruction::Or: 8123 case Instruction::Mul: 8124 case Instruction::FMul: { 8125 Value *LL = LU->getOperand(0); 8126 Value *LR = LU->getOperand(1); 8127 // Find a recurrence. 8128 if (LL == P) 8129 L = LR; 8130 else if (LR == P) 8131 L = LL; 8132 else 8133 continue; // Check for recurrence with L and R flipped. 8134 8135 break; // Match! 8136 } 8137 }; 8138 8139 // We have matched a recurrence of the form: 8140 // %iv = [R, %entry], [%iv.next, %backedge] 8141 // %iv.next = binop %iv, L 8142 // OR 8143 // %iv = [R, %entry], [%iv.next, %backedge] 8144 // %iv.next = binop L, %iv 8145 BO = LU; 8146 Start = R; 8147 Step = L; 8148 return true; 8149 } 8150 return false; 8151 } 8152 8153 bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, 8154 Value *&Start, Value *&Step) { 8155 BinaryOperator *BO = nullptr; 8156 P = dyn_cast<PHINode>(I->getOperand(0)); 8157 if (!P) 8158 P = dyn_cast<PHINode>(I->getOperand(1)); 8159 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I; 8160 } 8161 8162 /// Return true if "icmp Pred LHS RHS" is always true. 8163 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, 8164 const Value *RHS, const DataLayout &DL, 8165 unsigned Depth) { 8166 if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) 8167 return true; 8168 8169 switch (Pred) { 8170 default: 8171 return false; 8172 8173 case CmpInst::ICMP_SLE: { 8174 const APInt *C; 8175 8176 // LHS s<= LHS +_{nsw} C if C >= 0 8177 if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) 8178 return !C->isNegative(); 8179 return false; 8180 } 8181 8182 case CmpInst::ICMP_ULE: { 8183 // LHS u<= LHS +_{nuw} V for any V 8184 if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) && 8185 cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap()) 8186 return true; 8187 8188 // RHS >> V u<= RHS for any V 8189 if (match(LHS, m_LShr(m_Specific(RHS), m_Value()))) 8190 return true; 8191 8192 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) 8193 auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, 8194 const Value *&X, 8195 const APInt *&CA, const APInt *&CB) { 8196 if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && 8197 match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) 8198 return true; 8199 8200 // If X & C == 0 then (X | C) == X +_{nuw} C 8201 if (match(A, m_Or(m_Value(X), m_APInt(CA))) && 8202 match(B, m_Or(m_Specific(X), m_APInt(CB)))) { 8203 KnownBits Known(CA->getBitWidth()); 8204 computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, 8205 /*CxtI*/ nullptr, /*DT*/ nullptr); 8206 if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) 8207 return true; 8208 } 8209 8210 return false; 8211 }; 8212 8213 const Value *X; 8214 const APInt *CLHS, *CRHS; 8215 if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) 8216 return CLHS->ule(*CRHS); 8217 8218 return false; 8219 } 8220 } 8221 } 8222 8223 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred 8224 /// ALHS ARHS" is true. Otherwise, return std::nullopt. 8225 static std::optional<bool> 8226 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, 8227 const Value *ARHS, const Value *BLHS, const Value *BRHS, 8228 const DataLayout &DL, unsigned Depth) { 8229 switch (Pred) { 8230 default: 8231 return std::nullopt; 8232 8233 case CmpInst::ICMP_SLT: 8234 case CmpInst::ICMP_SLE: 8235 if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && 8236 isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) 8237 return true; 8238 return std::nullopt; 8239 8240 case CmpInst::ICMP_SGT: 8241 case CmpInst::ICMP_SGE: 8242 if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS, DL, Depth) && 8243 isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS, DL, Depth)) 8244 return true; 8245 return std::nullopt; 8246 8247 case CmpInst::ICMP_ULT: 8248 case CmpInst::ICMP_ULE: 8249 if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && 8250 isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) 8251 return true; 8252 return std::nullopt; 8253 8254 case CmpInst::ICMP_UGT: 8255 case CmpInst::ICMP_UGE: 8256 if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS, DL, Depth) && 8257 isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS, DL, Depth)) 8258 return true; 8259 return std::nullopt; 8260 } 8261 } 8262 8263 /// Return true if the operands of two compares (expanded as "L0 pred L1" and 8264 /// "R0 pred R1") match. IsSwappedOps is true when the operands match, but are 8265 /// swapped. 8266 static bool areMatchingOperands(const Value *L0, const Value *L1, const Value *R0, 8267 const Value *R1, bool &AreSwappedOps) { 8268 bool AreMatchingOps = (L0 == R0 && L1 == R1); 8269 AreSwappedOps = (L0 == R1 && L1 == R0); 8270 return AreMatchingOps || AreSwappedOps; 8271 } 8272 8273 /// Return true if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is true. 8274 /// Return false if "icmp1 LPred X, Y" implies "icmp2 RPred X, Y" is false. 8275 /// Otherwise, return std::nullopt if we can't infer anything. 8276 static std::optional<bool> 8277 isImpliedCondMatchingOperands(CmpInst::Predicate LPred, 8278 CmpInst::Predicate RPred, bool AreSwappedOps) { 8279 // Canonicalize the predicate as if the operands were not commuted. 8280 if (AreSwappedOps) 8281 RPred = ICmpInst::getSwappedPredicate(RPred); 8282 8283 if (CmpInst::isImpliedTrueByMatchingCmp(LPred, RPred)) 8284 return true; 8285 if (CmpInst::isImpliedFalseByMatchingCmp(LPred, RPred)) 8286 return false; 8287 8288 return std::nullopt; 8289 } 8290 8291 /// Return true if "icmp LPred X, LC" implies "icmp RPred X, RC" is true. 8292 /// Return false if "icmp LPred X, LC" implies "icmp RPred X, RC" is false. 8293 /// Otherwise, return std::nullopt if we can't infer anything. 8294 static std::optional<bool> isImpliedCondCommonOperandWithConstants( 8295 CmpInst::Predicate LPred, const APInt &LC, CmpInst::Predicate RPred, 8296 const APInt &RC) { 8297 ConstantRange DomCR = ConstantRange::makeExactICmpRegion(LPred, LC); 8298 ConstantRange CR = ConstantRange::makeExactICmpRegion(RPred, RC); 8299 ConstantRange Intersection = DomCR.intersectWith(CR); 8300 ConstantRange Difference = DomCR.difference(CR); 8301 if (Intersection.isEmptySet()) 8302 return false; 8303 if (Difference.isEmptySet()) 8304 return true; 8305 return std::nullopt; 8306 } 8307 8308 /// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1") 8309 /// is true. Return false if LHS implies RHS is false. Otherwise, return 8310 /// std::nullopt if we can't infer anything. 8311 static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS, 8312 CmpInst::Predicate RPred, 8313 const Value *R0, const Value *R1, 8314 const DataLayout &DL, 8315 bool LHSIsTrue, unsigned Depth) { 8316 Value *L0 = LHS->getOperand(0); 8317 Value *L1 = LHS->getOperand(1); 8318 8319 // The rest of the logic assumes the LHS condition is true. If that's not the 8320 // case, invert the predicate to make it so. 8321 CmpInst::Predicate LPred = 8322 LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); 8323 8324 // Can we infer anything when the 0-operands match and the 1-operands are 8325 // constants (not necessarily matching)? 8326 const APInt *LC, *RC; 8327 if (L0 == R0 && match(L1, m_APInt(LC)) && match(R1, m_APInt(RC))) 8328 return isImpliedCondCommonOperandWithConstants(LPred, *LC, RPred, *RC); 8329 8330 // Can we infer anything when the two compares have matching operands? 8331 bool AreSwappedOps; 8332 if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) 8333 return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); 8334 8335 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1 8336 if (ICmpInst::isUnsigned(LPred) && ICmpInst::isUnsigned(RPred)) { 8337 if (L0 == R1) { 8338 std::swap(R0, R1); 8339 RPred = ICmpInst::getSwappedPredicate(RPred); 8340 } 8341 if (L1 == R0) { 8342 std::swap(L0, L1); 8343 LPred = ICmpInst::getSwappedPredicate(LPred); 8344 } 8345 if (L1 == R1) { 8346 std::swap(L0, L1); 8347 LPred = ICmpInst::getSwappedPredicate(LPred); 8348 std::swap(R0, R1); 8349 RPred = ICmpInst::getSwappedPredicate(RPred); 8350 } 8351 if (L0 == R0 && 8352 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) && 8353 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) && 8354 match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))) 8355 return LPred == RPred; 8356 } 8357 8358 if (LPred == RPred) 8359 return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); 8360 8361 return std::nullopt; 8362 } 8363 8364 /// Return true if LHS implies RHS is true. Return false if LHS implies RHS is 8365 /// false. Otherwise, return std::nullopt if we can't infer anything. We 8366 /// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select' 8367 /// instruction. 8368 static std::optional<bool> 8369 isImpliedCondAndOr(const Instruction *LHS, CmpInst::Predicate RHSPred, 8370 const Value *RHSOp0, const Value *RHSOp1, 8371 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8372 // The LHS must be an 'or', 'and', or a 'select' instruction. 8373 assert((LHS->getOpcode() == Instruction::And || 8374 LHS->getOpcode() == Instruction::Or || 8375 LHS->getOpcode() == Instruction::Select) && 8376 "Expected LHS to be 'and', 'or', or 'select'."); 8377 8378 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit"); 8379 8380 // If the result of an 'or' is false, then we know both legs of the 'or' are 8381 // false. Similarly, if the result of an 'and' is true, then we know both 8382 // legs of the 'and' are true. 8383 const Value *ALHS, *ARHS; 8384 if ((!LHSIsTrue && match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))) || 8385 (LHSIsTrue && match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS))))) { 8386 // FIXME: Make this non-recursion. 8387 if (std::optional<bool> Implication = isImpliedCondition( 8388 ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8389 return Implication; 8390 if (std::optional<bool> Implication = isImpliedCondition( 8391 ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1)) 8392 return Implication; 8393 return std::nullopt; 8394 } 8395 return std::nullopt; 8396 } 8397 8398 std::optional<bool> 8399 llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, 8400 const Value *RHSOp0, const Value *RHSOp1, 8401 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) { 8402 // Bail out when we hit the limit. 8403 if (Depth == MaxAnalysisRecursionDepth) 8404 return std::nullopt; 8405 8406 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for 8407 // example. 8408 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy()) 8409 return std::nullopt; 8410 8411 assert(LHS->getType()->isIntOrIntVectorTy(1) && 8412 "Expected integer type only!"); 8413 8414 // Both LHS and RHS are icmps. 8415 const ICmpInst *LHSCmp = dyn_cast<ICmpInst>(LHS); 8416 if (LHSCmp) 8417 return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8418 Depth); 8419 8420 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect 8421 /// the RHS to be an icmp. 8422 /// FIXME: Add support for and/or/select on the RHS. 8423 if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) { 8424 if ((LHSI->getOpcode() == Instruction::And || 8425 LHSI->getOpcode() == Instruction::Or || 8426 LHSI->getOpcode() == Instruction::Select)) 8427 return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, 8428 Depth); 8429 } 8430 return std::nullopt; 8431 } 8432 8433 std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, 8434 const DataLayout &DL, 8435 bool LHSIsTrue, unsigned Depth) { 8436 // LHS ==> RHS by definition 8437 if (LHS == RHS) 8438 return LHSIsTrue; 8439 8440 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) 8441 return isImpliedCondition(LHS, RHSCmp->getPredicate(), 8442 RHSCmp->getOperand(0), RHSCmp->getOperand(1), DL, 8443 LHSIsTrue, Depth); 8444 8445 if (Depth == MaxAnalysisRecursionDepth) 8446 return std::nullopt; 8447 8448 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2 8449 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2 8450 const Value *RHS1, *RHS2; 8451 if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) { 8452 if (std::optional<bool> Imp = 8453 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8454 if (*Imp == true) 8455 return true; 8456 if (std::optional<bool> Imp = 8457 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8458 if (*Imp == true) 8459 return true; 8460 } 8461 if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) { 8462 if (std::optional<bool> Imp = 8463 isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1)) 8464 if (*Imp == false) 8465 return false; 8466 if (std::optional<bool> Imp = 8467 isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1)) 8468 if (*Imp == false) 8469 return false; 8470 } 8471 8472 return std::nullopt; 8473 } 8474 8475 // Returns a pair (Condition, ConditionIsTrue), where Condition is a branch 8476 // condition dominating ContextI or nullptr, if no condition is found. 8477 static std::pair<Value *, bool> 8478 getDomPredecessorCondition(const Instruction *ContextI) { 8479 if (!ContextI || !ContextI->getParent()) 8480 return {nullptr, false}; 8481 8482 // TODO: This is a poor/cheap way to determine dominance. Should we use a 8483 // dominator tree (eg, from a SimplifyQuery) instead? 8484 const BasicBlock *ContextBB = ContextI->getParent(); 8485 const BasicBlock *PredBB = ContextBB->getSinglePredecessor(); 8486 if (!PredBB) 8487 return {nullptr, false}; 8488 8489 // We need a conditional branch in the predecessor. 8490 Value *PredCond; 8491 BasicBlock *TrueBB, *FalseBB; 8492 if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB))) 8493 return {nullptr, false}; 8494 8495 // The branch should get simplified. Don't bother simplifying this condition. 8496 if (TrueBB == FalseBB) 8497 return {nullptr, false}; 8498 8499 assert((TrueBB == ContextBB || FalseBB == ContextBB) && 8500 "Predecessor block does not point to successor?"); 8501 8502 // Is this condition implied by the predecessor condition? 8503 return {PredCond, TrueBB == ContextBB}; 8504 } 8505 8506 std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond, 8507 const Instruction *ContextI, 8508 const DataLayout &DL) { 8509 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool"); 8510 auto PredCond = getDomPredecessorCondition(ContextI); 8511 if (PredCond.first) 8512 return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second); 8513 return std::nullopt; 8514 } 8515 8516 std::optional<bool> llvm::isImpliedByDomCondition(CmpInst::Predicate Pred, 8517 const Value *LHS, 8518 const Value *RHS, 8519 const Instruction *ContextI, 8520 const DataLayout &DL) { 8521 auto PredCond = getDomPredecessorCondition(ContextI); 8522 if (PredCond.first) 8523 return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL, 8524 PredCond.second); 8525 return std::nullopt; 8526 } 8527 8528 static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, 8529 APInt &Upper, const InstrInfoQuery &IIQ, 8530 bool PreferSignedRange) { 8531 unsigned Width = Lower.getBitWidth(); 8532 const APInt *C; 8533 switch (BO.getOpcode()) { 8534 case Instruction::Add: 8535 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8536 bool HasNSW = IIQ.hasNoSignedWrap(&BO); 8537 bool HasNUW = IIQ.hasNoUnsignedWrap(&BO); 8538 8539 // If the caller expects a signed compare, then try to use a signed range. 8540 // Otherwise if both no-wraps are set, use the unsigned range because it 8541 // is never larger than the signed range. Example: 8542 // "add nuw nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. 8543 if (PreferSignedRange && HasNSW && HasNUW) 8544 HasNUW = false; 8545 8546 if (HasNUW) { 8547 // 'add nuw x, C' produces [C, UINT_MAX]. 8548 Lower = *C; 8549 } else if (HasNSW) { 8550 if (C->isNegative()) { 8551 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C]. 8552 Lower = APInt::getSignedMinValue(Width); 8553 Upper = APInt::getSignedMaxValue(Width) + *C + 1; 8554 } else { 8555 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX]. 8556 Lower = APInt::getSignedMinValue(Width) + *C; 8557 Upper = APInt::getSignedMaxValue(Width) + 1; 8558 } 8559 } 8560 } 8561 break; 8562 8563 case Instruction::And: 8564 if (match(BO.getOperand(1), m_APInt(C))) 8565 // 'and x, C' produces [0, C]. 8566 Upper = *C + 1; 8567 // X & -X is a power of two or zero. So we can cap the value at max power of 8568 // two. 8569 if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) || 8570 match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0))))) 8571 Upper = APInt::getSignedMinValue(Width) + 1; 8572 break; 8573 8574 case Instruction::Or: 8575 if (match(BO.getOperand(1), m_APInt(C))) 8576 // 'or x, C' produces [C, UINT_MAX]. 8577 Lower = *C; 8578 break; 8579 8580 case Instruction::AShr: 8581 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8582 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C]. 8583 Lower = APInt::getSignedMinValue(Width).ashr(*C); 8584 Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; 8585 } else if (match(BO.getOperand(0), m_APInt(C))) { 8586 unsigned ShiftAmount = Width - 1; 8587 if (!C->isZero() && IIQ.isExact(&BO)) 8588 ShiftAmount = C->countr_zero(); 8589 if (C->isNegative()) { 8590 // 'ashr C, x' produces [C, C >> (Width-1)] 8591 Lower = *C; 8592 Upper = C->ashr(ShiftAmount) + 1; 8593 } else { 8594 // 'ashr C, x' produces [C >> (Width-1), C] 8595 Lower = C->ashr(ShiftAmount); 8596 Upper = *C + 1; 8597 } 8598 } 8599 break; 8600 8601 case Instruction::LShr: 8602 if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8603 // 'lshr x, C' produces [0, UINT_MAX >> C]. 8604 Upper = APInt::getAllOnes(Width).lshr(*C) + 1; 8605 } else if (match(BO.getOperand(0), m_APInt(C))) { 8606 // 'lshr C, x' produces [C >> (Width-1), C]. 8607 unsigned ShiftAmount = Width - 1; 8608 if (!C->isZero() && IIQ.isExact(&BO)) 8609 ShiftAmount = C->countr_zero(); 8610 Lower = C->lshr(ShiftAmount); 8611 Upper = *C + 1; 8612 } 8613 break; 8614 8615 case Instruction::Shl: 8616 if (match(BO.getOperand(0), m_APInt(C))) { 8617 if (IIQ.hasNoUnsignedWrap(&BO)) { 8618 // 'shl nuw C, x' produces [C, C << CLZ(C)] 8619 Lower = *C; 8620 Upper = Lower.shl(Lower.countl_zero()) + 1; 8621 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw? 8622 if (C->isNegative()) { 8623 // 'shl nsw C, x' produces [C << CLO(C)-1, C] 8624 unsigned ShiftAmount = C->countl_one() - 1; 8625 Lower = C->shl(ShiftAmount); 8626 Upper = *C + 1; 8627 } else { 8628 // 'shl nsw C, x' produces [C, C << CLZ(C)-1] 8629 unsigned ShiftAmount = C->countl_zero() - 1; 8630 Lower = *C; 8631 Upper = C->shl(ShiftAmount) + 1; 8632 } 8633 } else { 8634 // If lowbit is set, value can never be zero. 8635 if ((*C)[0]) 8636 Lower = APInt::getOneBitSet(Width, 0); 8637 // If we are shifting a constant the largest it can be is if the longest 8638 // sequence of consecutive ones is shifted to the highbits (breaking 8639 // ties for which sequence is higher). At the moment we take a liberal 8640 // upper bound on this by just popcounting the constant. 8641 // TODO: There may be a bitwise trick for it longest/highest 8642 // consecutative sequence of ones (naive method is O(Width) loop). 8643 Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1; 8644 } 8645 } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { 8646 Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1; 8647 } 8648 break; 8649 8650 case Instruction::SDiv: 8651 if (match(BO.getOperand(1), m_APInt(C))) { 8652 APInt IntMin = APInt::getSignedMinValue(Width); 8653 APInt IntMax = APInt::getSignedMaxValue(Width); 8654 if (C->isAllOnes()) { 8655 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] 8656 // where C != -1 and C != 0 and C != 1 8657 Lower = IntMin + 1; 8658 Upper = IntMax + 1; 8659 } else if (C->countl_zero() < Width - 1) { 8660 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C] 8661 // where C != -1 and C != 0 and C != 1 8662 Lower = IntMin.sdiv(*C); 8663 Upper = IntMax.sdiv(*C); 8664 if (Lower.sgt(Upper)) 8665 std::swap(Lower, Upper); 8666 Upper = Upper + 1; 8667 assert(Upper != Lower && "Upper part of range has wrapped!"); 8668 } 8669 } else if (match(BO.getOperand(0), m_APInt(C))) { 8670 if (C->isMinSignedValue()) { 8671 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. 8672 Lower = *C; 8673 Upper = Lower.lshr(1) + 1; 8674 } else { 8675 // 'sdiv C, x' produces [-|C|, |C|]. 8676 Upper = C->abs() + 1; 8677 Lower = (-Upper) + 1; 8678 } 8679 } 8680 break; 8681 8682 case Instruction::UDiv: 8683 if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) { 8684 // 'udiv x, C' produces [0, UINT_MAX / C]. 8685 Upper = APInt::getMaxValue(Width).udiv(*C) + 1; 8686 } else if (match(BO.getOperand(0), m_APInt(C))) { 8687 // 'udiv C, x' produces [0, C]. 8688 Upper = *C + 1; 8689 } 8690 break; 8691 8692 case Instruction::SRem: 8693 if (match(BO.getOperand(1), m_APInt(C))) { 8694 // 'srem x, C' produces (-|C|, |C|). 8695 Upper = C->abs(); 8696 Lower = (-Upper) + 1; 8697 } 8698 break; 8699 8700 case Instruction::URem: 8701 if (match(BO.getOperand(1), m_APInt(C))) 8702 // 'urem x, C' produces [0, C). 8703 Upper = *C; 8704 break; 8705 8706 default: 8707 break; 8708 } 8709 } 8710 8711 static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) { 8712 unsigned Width = II.getType()->getScalarSizeInBits(); 8713 const APInt *C; 8714 switch (II.getIntrinsicID()) { 8715 case Intrinsic::ctpop: 8716 case Intrinsic::ctlz: 8717 case Intrinsic::cttz: 8718 // Maximum of set/clear bits is the bit width. 8719 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8720 APInt(Width, Width + 1)); 8721 case Intrinsic::uadd_sat: 8722 // uadd.sat(x, C) produces [C, UINT_MAX]. 8723 if (match(II.getOperand(0), m_APInt(C)) || 8724 match(II.getOperand(1), m_APInt(C))) 8725 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8726 break; 8727 case Intrinsic::sadd_sat: 8728 if (match(II.getOperand(0), m_APInt(C)) || 8729 match(II.getOperand(1), m_APInt(C))) { 8730 if (C->isNegative()) 8731 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)]. 8732 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8733 APInt::getSignedMaxValue(Width) + *C + 8734 1); 8735 8736 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX]. 8737 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C, 8738 APInt::getSignedMaxValue(Width) + 1); 8739 } 8740 break; 8741 case Intrinsic::usub_sat: 8742 // usub.sat(C, x) produces [0, C]. 8743 if (match(II.getOperand(0), m_APInt(C))) 8744 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8745 8746 // usub.sat(x, C) produces [0, UINT_MAX - C]. 8747 if (match(II.getOperand(1), m_APInt(C))) 8748 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8749 APInt::getMaxValue(Width) - *C + 1); 8750 break; 8751 case Intrinsic::ssub_sat: 8752 if (match(II.getOperand(0), m_APInt(C))) { 8753 if (C->isNegative()) 8754 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)]. 8755 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8756 *C - APInt::getSignedMinValue(Width) + 8757 1); 8758 8759 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX]. 8760 return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width), 8761 APInt::getSignedMaxValue(Width) + 1); 8762 } else if (match(II.getOperand(1), m_APInt(C))) { 8763 if (C->isNegative()) 8764 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]: 8765 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C, 8766 APInt::getSignedMaxValue(Width) + 1); 8767 8768 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C]. 8769 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8770 APInt::getSignedMaxValue(Width) - *C + 8771 1); 8772 } 8773 break; 8774 case Intrinsic::umin: 8775 case Intrinsic::umax: 8776 case Intrinsic::smin: 8777 case Intrinsic::smax: 8778 if (!match(II.getOperand(0), m_APInt(C)) && 8779 !match(II.getOperand(1), m_APInt(C))) 8780 break; 8781 8782 switch (II.getIntrinsicID()) { 8783 case Intrinsic::umin: 8784 return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1); 8785 case Intrinsic::umax: 8786 return ConstantRange::getNonEmpty(*C, APInt::getZero(Width)); 8787 case Intrinsic::smin: 8788 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width), 8789 *C + 1); 8790 case Intrinsic::smax: 8791 return ConstantRange::getNonEmpty(*C, 8792 APInt::getSignedMaxValue(Width) + 1); 8793 default: 8794 llvm_unreachable("Must be min/max intrinsic"); 8795 } 8796 break; 8797 case Intrinsic::abs: 8798 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX], 8799 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8800 if (match(II.getOperand(1), m_One())) 8801 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8802 APInt::getSignedMaxValue(Width) + 1); 8803 8804 return ConstantRange::getNonEmpty(APInt::getZero(Width), 8805 APInt::getSignedMinValue(Width) + 1); 8806 case Intrinsic::vscale: 8807 if (!II.getParent() || !II.getFunction()) 8808 break; 8809 return getVScaleRange(II.getFunction(), Width); 8810 default: 8811 break; 8812 } 8813 8814 return ConstantRange::getFull(Width); 8815 } 8816 8817 static ConstantRange getRangeForSelectPattern(const SelectInst &SI, 8818 const InstrInfoQuery &IIQ) { 8819 unsigned BitWidth = SI.getType()->getScalarSizeInBits(); 8820 const Value *LHS = nullptr, *RHS = nullptr; 8821 SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); 8822 if (R.Flavor == SPF_UNKNOWN) 8823 return ConstantRange::getFull(BitWidth); 8824 8825 if (R.Flavor == SelectPatternFlavor::SPF_ABS) { 8826 // If the negation part of the abs (in RHS) has the NSW flag, 8827 // then the result of abs(X) is [0..SIGNED_MAX], 8828 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. 8829 if (match(RHS, m_Neg(m_Specific(LHS))) && 8830 IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) 8831 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8832 APInt::getSignedMaxValue(BitWidth) + 1); 8833 8834 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), 8835 APInt::getSignedMinValue(BitWidth) + 1); 8836 } 8837 8838 if (R.Flavor == SelectPatternFlavor::SPF_NABS) { 8839 // The result of -abs(X) is <= 0. 8840 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8841 APInt(BitWidth, 1)); 8842 } 8843 8844 const APInt *C; 8845 if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) 8846 return ConstantRange::getFull(BitWidth); 8847 8848 switch (R.Flavor) { 8849 case SPF_UMIN: 8850 return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1); 8851 case SPF_UMAX: 8852 return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth)); 8853 case SPF_SMIN: 8854 return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), 8855 *C + 1); 8856 case SPF_SMAX: 8857 return ConstantRange::getNonEmpty(*C, 8858 APInt::getSignedMaxValue(BitWidth) + 1); 8859 default: 8860 return ConstantRange::getFull(BitWidth); 8861 } 8862 } 8863 8864 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) { 8865 // The maximum representable value of a half is 65504. For floats the maximum 8866 // value is 3.4e38 which requires roughly 129 bits. 8867 unsigned BitWidth = I->getType()->getScalarSizeInBits(); 8868 if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy()) 8869 return; 8870 if (isa<FPToSIInst>(I) && BitWidth >= 17) { 8871 Lower = APInt(BitWidth, -65504); 8872 Upper = APInt(BitWidth, 65505); 8873 } 8874 8875 if (isa<FPToUIInst>(I) && BitWidth >= 16) { 8876 // For a fptoui the lower limit is left as 0. 8877 Upper = APInt(BitWidth, 65505); 8878 } 8879 } 8880 8881 ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, 8882 bool UseInstrInfo, AssumptionCache *AC, 8883 const Instruction *CtxI, 8884 const DominatorTree *DT, 8885 unsigned Depth) { 8886 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); 8887 8888 if (Depth == MaxAnalysisRecursionDepth) 8889 return ConstantRange::getFull(V->getType()->getScalarSizeInBits()); 8890 8891 const APInt *C; 8892 if (match(V, m_APInt(C))) 8893 return ConstantRange(*C); 8894 unsigned BitWidth = V->getType()->getScalarSizeInBits(); 8895 8896 if (auto *VC = dyn_cast<ConstantDataVector>(V)) { 8897 ConstantRange CR = ConstantRange::getEmpty(BitWidth); 8898 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; 8899 ++ElemIdx) 8900 CR = CR.unionWith(VC->getElementAsAPInt(ElemIdx)); 8901 return CR; 8902 } 8903 8904 InstrInfoQuery IIQ(UseInstrInfo); 8905 ConstantRange CR = ConstantRange::getFull(BitWidth); 8906 if (auto *BO = dyn_cast<BinaryOperator>(V)) { 8907 APInt Lower = APInt(BitWidth, 0); 8908 APInt Upper = APInt(BitWidth, 0); 8909 // TODO: Return ConstantRange. 8910 setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); 8911 CR = ConstantRange::getNonEmpty(Lower, Upper); 8912 } else if (auto *II = dyn_cast<IntrinsicInst>(V)) 8913 CR = getRangeForIntrinsic(*II); 8914 else if (auto *SI = dyn_cast<SelectInst>(V)) { 8915 ConstantRange CRTrue = computeConstantRange( 8916 SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8917 ConstantRange CRFalse = computeConstantRange( 8918 SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); 8919 CR = CRTrue.unionWith(CRFalse); 8920 CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ)); 8921 } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) { 8922 APInt Lower = APInt(BitWidth, 0); 8923 APInt Upper = APInt(BitWidth, 0); 8924 // TODO: Return ConstantRange. 8925 setLimitForFPToI(cast<Instruction>(V), Lower, Upper); 8926 CR = ConstantRange::getNonEmpty(Lower, Upper); 8927 } 8928 8929 if (auto *I = dyn_cast<Instruction>(V)) 8930 if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range)) 8931 CR = CR.intersectWith(getConstantRangeFromMetadata(*Range)); 8932 8933 if (CtxI && AC) { 8934 // Try to restrict the range based on information from assumptions. 8935 for (auto &AssumeVH : AC->assumptionsFor(V)) { 8936 if (!AssumeVH) 8937 continue; 8938 CallInst *I = cast<CallInst>(AssumeVH); 8939 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() && 8940 "Got assumption for the wrong function!"); 8941 assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && 8942 "must be an assume intrinsic"); 8943 8944 if (!isValidAssumeForContext(I, CtxI, DT)) 8945 continue; 8946 Value *Arg = I->getArgOperand(0); 8947 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg); 8948 // Currently we just use information from comparisons. 8949 if (!Cmp || Cmp->getOperand(0) != V) 8950 continue; 8951 // TODO: Set "ForSigned" parameter via Cmp->isSigned()? 8952 ConstantRange RHS = 8953 computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false, 8954 UseInstrInfo, AC, I, DT, Depth + 1); 8955 CR = CR.intersectWith( 8956 ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS)); 8957 } 8958 } 8959 8960 return CR; 8961 } 8962