1 //===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Correlated Value Propagation pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" 14 #include "llvm/ADT/DepthFirstIterator.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/Analysis/DomTreeUpdater.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LazyValueInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/IR/Attributes.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constant.h" 26 #include "llvm/IR/ConstantRange.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/IRBuilder.h" 31 #include "llvm/IR/InstrTypes.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/IntrinsicInst.h" 35 #include "llvm/IR/Operator.h" 36 #include "llvm/IR/PassManager.h" 37 #include "llvm/IR/Type.h" 38 #include "llvm/IR/Value.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Transforms/Utils/Local.h" 42 #include <cassert> 43 #include <optional> 44 #include <utility> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "correlated-value-propagation" 49 50 static cl::opt<bool> CanonicalizeICmpPredicatesToUnsigned( 51 "canonicalize-icmp-predicates-to-unsigned", cl::init(true), cl::Hidden, 52 cl::desc("Enables canonicalization of signed relational predicates to " 53 "unsigned (e.g. sgt => ugt)")); 54 55 STATISTIC(NumPhis, "Number of phis propagated"); 56 STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value"); 57 STATISTIC(NumSelects, "Number of selects propagated"); 58 STATISTIC(NumMemAccess, "Number of memory access targets propagated"); 59 STATISTIC(NumCmps, "Number of comparisons propagated"); 60 STATISTIC(NumReturns, "Number of return values propagated"); 61 STATISTIC(NumDeadCases, "Number of switch cases removed"); 62 STATISTIC(NumSDivSRemsNarrowed, 63 "Number of sdivs/srems whose width was decreased"); 64 STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); 65 STATISTIC(NumUDivURemsNarrowed, 66 "Number of udivs/urems whose width was decreased"); 67 STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr"); 68 STATISTIC(NumAShrsRemoved, "Number of ashr removed"); 69 STATISTIC(NumSRems, "Number of srem converted to urem"); 70 STATISTIC(NumSExt, "Number of sext converted to zext"); 71 STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned"); 72 STATISTIC(NumAnd, "Number of ands removed"); 73 STATISTIC(NumNW, "Number of no-wrap deductions"); 74 STATISTIC(NumNSW, "Number of no-signed-wrap deductions"); 75 STATISTIC(NumNUW, "Number of no-unsigned-wrap deductions"); 76 STATISTIC(NumAddNW, "Number of no-wrap deductions for add"); 77 STATISTIC(NumAddNSW, "Number of no-signed-wrap deductions for add"); 78 STATISTIC(NumAddNUW, "Number of no-unsigned-wrap deductions for add"); 79 STATISTIC(NumSubNW, "Number of no-wrap deductions for sub"); 80 STATISTIC(NumSubNSW, "Number of no-signed-wrap deductions for sub"); 81 STATISTIC(NumSubNUW, "Number of no-unsigned-wrap deductions for sub"); 82 STATISTIC(NumMulNW, "Number of no-wrap deductions for mul"); 83 STATISTIC(NumMulNSW, "Number of no-signed-wrap deductions for mul"); 84 STATISTIC(NumMulNUW, "Number of no-unsigned-wrap deductions for mul"); 85 STATISTIC(NumShlNW, "Number of no-wrap deductions for shl"); 86 STATISTIC(NumShlNSW, "Number of no-signed-wrap deductions for shl"); 87 STATISTIC(NumShlNUW, "Number of no-unsigned-wrap deductions for shl"); 88 STATISTIC(NumAbs, "Number of llvm.abs intrinsics removed"); 89 STATISTIC(NumOverflows, "Number of overflow checks removed"); 90 STATISTIC(NumSaturating, 91 "Number of saturating arithmetics converted to normal arithmetics"); 92 STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null"); 93 STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed"); 94 STATISTIC(NumUDivURemsNarrowedExpanded, 95 "Number of bound udiv's/urem's expanded"); 96 97 static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { 98 if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition())) 99 return false; 100 101 bool Changed = false; 102 for (Use &U : make_early_inc_range(S->uses())) { 103 auto *I = cast<Instruction>(U.getUser()); 104 Constant *C; 105 if (auto *PN = dyn_cast<PHINode>(I)) 106 C = LVI->getConstantOnEdge(S->getCondition(), PN->getIncomingBlock(U), 107 I->getParent(), I); 108 else 109 C = LVI->getConstant(S->getCondition(), I); 110 111 auto *CI = dyn_cast_or_null<ConstantInt>(C); 112 if (!CI) 113 continue; 114 115 U.set(CI->isOne() ? S->getTrueValue() : S->getFalseValue()); 116 Changed = true; 117 ++NumSelects; 118 } 119 120 if (Changed && S->use_empty()) 121 S->eraseFromParent(); 122 123 return Changed; 124 } 125 126 /// Try to simplify a phi with constant incoming values that match the edge 127 /// values of a non-constant value on all other edges: 128 /// bb0: 129 /// %isnull = icmp eq i8* %x, null 130 /// br i1 %isnull, label %bb2, label %bb1 131 /// bb1: 132 /// br label %bb2 133 /// bb2: 134 /// %r = phi i8* [ %x, %bb1 ], [ null, %bb0 ] 135 /// --> 136 /// %r = %x 137 static bool simplifyCommonValuePhi(PHINode *P, LazyValueInfo *LVI, 138 DominatorTree *DT) { 139 // Collect incoming constants and initialize possible common value. 140 SmallVector<std::pair<Constant *, unsigned>, 4> IncomingConstants; 141 Value *CommonValue = nullptr; 142 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { 143 Value *Incoming = P->getIncomingValue(i); 144 if (auto *IncomingConstant = dyn_cast<Constant>(Incoming)) { 145 IncomingConstants.push_back(std::make_pair(IncomingConstant, i)); 146 } else if (!CommonValue) { 147 // The potential common value is initialized to the first non-constant. 148 CommonValue = Incoming; 149 } else if (Incoming != CommonValue) { 150 // There can be only one non-constant common value. 151 return false; 152 } 153 } 154 155 if (!CommonValue || IncomingConstants.empty()) 156 return false; 157 158 // The common value must be valid in all incoming blocks. 159 BasicBlock *ToBB = P->getParent(); 160 if (auto *CommonInst = dyn_cast<Instruction>(CommonValue)) 161 if (!DT->dominates(CommonInst, ToBB)) 162 return false; 163 164 // We have a phi with exactly 1 variable incoming value and 1 or more constant 165 // incoming values. See if all constant incoming values can be mapped back to 166 // the same incoming variable value. 167 for (auto &IncomingConstant : IncomingConstants) { 168 Constant *C = IncomingConstant.first; 169 BasicBlock *IncomingBB = P->getIncomingBlock(IncomingConstant.second); 170 if (C != LVI->getConstantOnEdge(CommonValue, IncomingBB, ToBB, P)) 171 return false; 172 } 173 174 // LVI only guarantees that the value matches a certain constant if the value 175 // is not poison. Make sure we don't replace a well-defined value with poison. 176 // This is usually satisfied due to a prior branch on the value. 177 if (!isGuaranteedNotToBePoison(CommonValue, nullptr, P, DT)) 178 return false; 179 180 // All constant incoming values map to the same variable along the incoming 181 // edges of the phi. The phi is unnecessary. 182 P->replaceAllUsesWith(CommonValue); 183 P->eraseFromParent(); 184 ++NumPhiCommon; 185 return true; 186 } 187 188 static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming, 189 BasicBlock *From, BasicBlock *To, 190 Instruction *CxtI) { 191 if (Constant *C = LVI->getConstantOnEdge(Incoming, From, To, CxtI)) 192 return C; 193 194 // Look if the incoming value is a select with a scalar condition for which 195 // LVI can tells us the value. In that case replace the incoming value with 196 // the appropriate value of the select. This often allows us to remove the 197 // select later. 198 auto *SI = dyn_cast<SelectInst>(Incoming); 199 if (!SI) 200 return nullptr; 201 202 // Once LVI learns to handle vector types, we could also add support 203 // for vector type constants that are not all zeroes or all ones. 204 Value *Condition = SI->getCondition(); 205 if (!Condition->getType()->isVectorTy()) { 206 if (Constant *C = LVI->getConstantOnEdge(Condition, From, To, CxtI)) { 207 if (C->isOneValue()) 208 return SI->getTrueValue(); 209 if (C->isZeroValue()) 210 return SI->getFalseValue(); 211 } 212 } 213 214 // Look if the select has a constant but LVI tells us that the incoming 215 // value can never be that constant. In that case replace the incoming 216 // value with the other value of the select. This often allows us to 217 // remove the select later. 218 219 // The "false" case 220 if (auto *C = dyn_cast<Constant>(SI->getFalseValue())) 221 if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) == 222 LazyValueInfo::False) 223 return SI->getTrueValue(); 224 225 // The "true" case, 226 // similar to the select "false" case, but try the select "true" value 227 if (auto *C = dyn_cast<Constant>(SI->getTrueValue())) 228 if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) == 229 LazyValueInfo::False) 230 return SI->getFalseValue(); 231 232 return nullptr; 233 } 234 235 static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT, 236 const SimplifyQuery &SQ) { 237 bool Changed = false; 238 239 BasicBlock *BB = P->getParent(); 240 for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { 241 Value *Incoming = P->getIncomingValue(i); 242 if (isa<Constant>(Incoming)) continue; 243 244 Value *V = getValueOnEdge(LVI, Incoming, P->getIncomingBlock(i), BB, P); 245 if (V) { 246 P->setIncomingValue(i, V); 247 Changed = true; 248 } 249 } 250 251 if (Value *V = simplifyInstruction(P, SQ)) { 252 P->replaceAllUsesWith(V); 253 P->eraseFromParent(); 254 Changed = true; 255 } 256 257 if (!Changed) 258 Changed = simplifyCommonValuePhi(P, LVI, DT); 259 260 if (Changed) 261 ++NumPhis; 262 263 return Changed; 264 } 265 266 static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) { 267 Value *Pointer = nullptr; 268 if (LoadInst *L = dyn_cast<LoadInst>(I)) 269 Pointer = L->getPointerOperand(); 270 else 271 Pointer = cast<StoreInst>(I)->getPointerOperand(); 272 273 if (isa<Constant>(Pointer)) return false; 274 275 Constant *C = LVI->getConstant(Pointer, I); 276 if (!C) return false; 277 278 ++NumMemAccess; 279 I->replaceUsesOfWith(Pointer, C); 280 return true; 281 } 282 283 static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) { 284 if (!CanonicalizeICmpPredicatesToUnsigned) 285 return false; 286 287 // Only for signed relational comparisons of scalar integers. 288 if (Cmp->getType()->isVectorTy() || 289 !Cmp->getOperand(0)->getType()->isIntegerTy()) 290 return false; 291 292 if (!Cmp->isSigned()) 293 return false; 294 295 ICmpInst::Predicate UnsignedPred = 296 ConstantRange::getEquivalentPredWithFlippedSignedness( 297 Cmp->getPredicate(), LVI->getConstantRange(Cmp->getOperand(0), Cmp), 298 LVI->getConstantRange(Cmp->getOperand(1), Cmp)); 299 300 if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE) 301 return false; 302 303 ++NumSICmps; 304 Cmp->setPredicate(UnsignedPred); 305 306 return true; 307 } 308 309 /// See if LazyValueInfo's ability to exploit edge conditions or range 310 /// information is sufficient to prove this comparison. Even for local 311 /// conditions, this can sometimes prove conditions instcombine can't by 312 /// exploiting range information. 313 static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) { 314 Value *Op0 = Cmp->getOperand(0); 315 Value *Op1 = Cmp->getOperand(1); 316 LazyValueInfo::Tristate Result = 317 LVI->getPredicateAt(Cmp->getPredicate(), Op0, Op1, Cmp, 318 /*UseBlockValue=*/true); 319 if (Result == LazyValueInfo::Unknown) 320 return false; 321 322 ++NumCmps; 323 Constant *TorF = 324 ConstantInt::get(CmpInst::makeCmpResultType(Op0->getType()), Result); 325 Cmp->replaceAllUsesWith(TorF); 326 Cmp->eraseFromParent(); 327 return true; 328 } 329 330 static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { 331 if (constantFoldCmp(Cmp, LVI)) 332 return true; 333 334 if (auto *ICmp = dyn_cast<ICmpInst>(Cmp)) 335 if (processICmp(ICmp, LVI)) 336 return true; 337 338 return false; 339 } 340 341 /// Simplify a switch instruction by removing cases which can never fire. If the 342 /// uselessness of a case could be determined locally then constant propagation 343 /// would already have figured it out. Instead, walk the predecessors and 344 /// statically evaluate cases based on information available on that edge. Cases 345 /// that cannot fire no matter what the incoming edge can safely be removed. If 346 /// a case fires on every incoming edge then the entire switch can be removed 347 /// and replaced with a branch to the case destination. 348 static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, 349 DominatorTree *DT) { 350 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); 351 Value *Cond = I->getCondition(); 352 BasicBlock *BB = I->getParent(); 353 354 // Analyse each switch case in turn. 355 bool Changed = false; 356 DenseMap<BasicBlock*, int> SuccessorsCount; 357 for (auto *Succ : successors(BB)) 358 SuccessorsCount[Succ]++; 359 360 { // Scope for SwitchInstProfUpdateWrapper. It must not live during 361 // ConstantFoldTerminator() as the underlying SwitchInst can be changed. 362 SwitchInstProfUpdateWrapper SI(*I); 363 364 for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { 365 ConstantInt *Case = CI->getCaseValue(); 366 LazyValueInfo::Tristate State = 367 LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, 368 /* UseBlockValue */ true); 369 370 if (State == LazyValueInfo::False) { 371 // This case never fires - remove it. 372 BasicBlock *Succ = CI->getCaseSuccessor(); 373 Succ->removePredecessor(BB); 374 CI = SI.removeCase(CI); 375 CE = SI->case_end(); 376 377 // The condition can be modified by removePredecessor's PHI simplification 378 // logic. 379 Cond = SI->getCondition(); 380 381 ++NumDeadCases; 382 Changed = true; 383 if (--SuccessorsCount[Succ] == 0) 384 DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}}); 385 continue; 386 } 387 if (State == LazyValueInfo::True) { 388 // This case always fires. Arrange for the switch to be turned into an 389 // unconditional branch by replacing the switch condition with the case 390 // value. 391 SI->setCondition(Case); 392 NumDeadCases += SI->getNumCases(); 393 Changed = true; 394 break; 395 } 396 397 // Increment the case iterator since we didn't delete it. 398 ++CI; 399 } 400 } 401 402 if (Changed) 403 // If the switch has been simplified to the point where it can be replaced 404 // by a branch then do so now. 405 ConstantFoldTerminator(BB, /*DeleteDeadConditions = */ false, 406 /*TLI = */ nullptr, &DTU); 407 return Changed; 408 } 409 410 // See if we can prove that the given binary op intrinsic will not overflow. 411 static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) { 412 ConstantRange LRange = LVI->getConstantRangeAtUse(BO->getOperandUse(0)); 413 ConstantRange RRange = LVI->getConstantRangeAtUse(BO->getOperandUse(1)); 414 ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( 415 BO->getBinaryOp(), RRange, BO->getNoWrapKind()); 416 return NWRegion.contains(LRange); 417 } 418 419 static void setDeducedOverflowingFlags(Value *V, Instruction::BinaryOps Opcode, 420 bool NewNSW, bool NewNUW) { 421 Statistic *OpcNW, *OpcNSW, *OpcNUW; 422 switch (Opcode) { 423 case Instruction::Add: 424 OpcNW = &NumAddNW; 425 OpcNSW = &NumAddNSW; 426 OpcNUW = &NumAddNUW; 427 break; 428 case Instruction::Sub: 429 OpcNW = &NumSubNW; 430 OpcNSW = &NumSubNSW; 431 OpcNUW = &NumSubNUW; 432 break; 433 case Instruction::Mul: 434 OpcNW = &NumMulNW; 435 OpcNSW = &NumMulNSW; 436 OpcNUW = &NumMulNUW; 437 break; 438 case Instruction::Shl: 439 OpcNW = &NumShlNW; 440 OpcNSW = &NumShlNSW; 441 OpcNUW = &NumShlNUW; 442 break; 443 default: 444 llvm_unreachable("Will not be called with other binops"); 445 } 446 447 auto *Inst = dyn_cast<Instruction>(V); 448 if (NewNSW) { 449 ++NumNW; 450 ++*OpcNW; 451 ++NumNSW; 452 ++*OpcNSW; 453 if (Inst) 454 Inst->setHasNoSignedWrap(); 455 } 456 if (NewNUW) { 457 ++NumNW; 458 ++*OpcNW; 459 ++NumNUW; 460 ++*OpcNUW; 461 if (Inst) 462 Inst->setHasNoUnsignedWrap(); 463 } 464 } 465 466 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI); 467 468 // See if @llvm.abs argument is alays positive/negative, and simplify. 469 // Notably, INT_MIN can belong to either range, regardless of the NSW, 470 // because it is negation-invariant. 471 static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) { 472 Value *X = II->getArgOperand(0); 473 Type *Ty = X->getType(); 474 if (!Ty->isIntegerTy()) 475 return false; 476 477 bool IsIntMinPoison = cast<ConstantInt>(II->getArgOperand(1))->isOne(); 478 APInt IntMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits()); 479 ConstantRange Range = LVI->getConstantRangeAtUse( 480 II->getOperandUse(0), /*UndefAllowed*/ IsIntMinPoison); 481 482 // Is X in [0, IntMin]? NOTE: INT_MIN is fine! 483 if (Range.icmp(CmpInst::ICMP_ULE, IntMin)) { 484 ++NumAbs; 485 II->replaceAllUsesWith(X); 486 II->eraseFromParent(); 487 return true; 488 } 489 490 // Is X in [IntMin, 0]? NOTE: INT_MIN is fine! 491 if (Range.getSignedMax().isNonPositive()) { 492 IRBuilder<> B(II); 493 Value *NegX = B.CreateNeg(X, II->getName(), /*HasNUW=*/false, 494 /*HasNSW=*/IsIntMinPoison); 495 ++NumAbs; 496 II->replaceAllUsesWith(NegX); 497 II->eraseFromParent(); 498 499 // See if we can infer some no-wrap flags. 500 if (auto *BO = dyn_cast<BinaryOperator>(NegX)) 501 processBinOp(BO, LVI); 502 503 return true; 504 } 505 506 // Argument's range crosses zero. 507 // Can we at least tell that the argument is never INT_MIN? 508 if (!IsIntMinPoison && !Range.contains(IntMin)) { 509 ++NumNSW; 510 ++NumSubNSW; 511 II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); 512 return true; 513 } 514 return false; 515 } 516 517 // See if this min/max intrinsic always picks it's one specific operand. 518 static bool processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) { 519 CmpInst::Predicate Pred = CmpInst::getNonStrictPredicate(MM->getPredicate()); 520 LazyValueInfo::Tristate Result = LVI->getPredicateAt( 521 Pred, MM->getLHS(), MM->getRHS(), MM, /*UseBlockValue=*/true); 522 if (Result == LazyValueInfo::Unknown) 523 return false; 524 525 ++NumMinMax; 526 MM->replaceAllUsesWith(MM->getOperand(!Result)); 527 MM->eraseFromParent(); 528 return true; 529 } 530 531 // Rewrite this with.overflow intrinsic as non-overflowing. 532 static bool processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) { 533 IRBuilder<> B(WO); 534 Instruction::BinaryOps Opcode = WO->getBinaryOp(); 535 bool NSW = WO->isSigned(); 536 bool NUW = !WO->isSigned(); 537 538 Value *NewOp = 539 B.CreateBinOp(Opcode, WO->getLHS(), WO->getRHS(), WO->getName()); 540 setDeducedOverflowingFlags(NewOp, Opcode, NSW, NUW); 541 542 StructType *ST = cast<StructType>(WO->getType()); 543 Constant *Struct = ConstantStruct::get(ST, 544 { PoisonValue::get(ST->getElementType(0)), 545 ConstantInt::getFalse(ST->getElementType(1)) }); 546 Value *NewI = B.CreateInsertValue(Struct, NewOp, 0); 547 WO->replaceAllUsesWith(NewI); 548 WO->eraseFromParent(); 549 ++NumOverflows; 550 551 // See if we can infer the other no-wrap too. 552 if (auto *BO = dyn_cast<BinaryOperator>(NewOp)) 553 processBinOp(BO, LVI); 554 555 return true; 556 } 557 558 static bool processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) { 559 Instruction::BinaryOps Opcode = SI->getBinaryOp(); 560 bool NSW = SI->isSigned(); 561 bool NUW = !SI->isSigned(); 562 BinaryOperator *BinOp = BinaryOperator::Create( 563 Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI); 564 BinOp->setDebugLoc(SI->getDebugLoc()); 565 setDeducedOverflowingFlags(BinOp, Opcode, NSW, NUW); 566 567 SI->replaceAllUsesWith(BinOp); 568 SI->eraseFromParent(); 569 ++NumSaturating; 570 571 // See if we can infer the other no-wrap too. 572 if (auto *BO = dyn_cast<BinaryOperator>(BinOp)) 573 processBinOp(BO, LVI); 574 575 return true; 576 } 577 578 /// Infer nonnull attributes for the arguments at the specified callsite. 579 static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { 580 581 if (CB.getIntrinsicID() == Intrinsic::abs) { 582 return processAbsIntrinsic(&cast<IntrinsicInst>(CB), LVI); 583 } 584 585 if (auto *MM = dyn_cast<MinMaxIntrinsic>(&CB)) { 586 return processMinMaxIntrinsic(MM, LVI); 587 } 588 589 if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) { 590 if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) { 591 return processOverflowIntrinsic(WO, LVI); 592 } 593 } 594 595 if (auto *SI = dyn_cast<SaturatingInst>(&CB)) { 596 if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) { 597 return processSaturatingInst(SI, LVI); 598 } 599 } 600 601 bool Changed = false; 602 603 // Deopt bundle operands are intended to capture state with minimal 604 // perturbance of the code otherwise. If we can find a constant value for 605 // any such operand and remove a use of the original value, that's 606 // desireable since it may allow further optimization of that value (e.g. via 607 // single use rules in instcombine). Since deopt uses tend to, 608 // idiomatically, appear along rare conditional paths, it's reasonable likely 609 // we may have a conditional fact with which LVI can fold. 610 if (auto DeoptBundle = CB.getOperandBundle(LLVMContext::OB_deopt)) { 611 for (const Use &ConstU : DeoptBundle->Inputs) { 612 Use &U = const_cast<Use&>(ConstU); 613 Value *V = U.get(); 614 if (V->getType()->isVectorTy()) continue; 615 if (isa<Constant>(V)) continue; 616 617 Constant *C = LVI->getConstant(V, &CB); 618 if (!C) continue; 619 U.set(C); 620 Changed = true; 621 } 622 } 623 624 SmallVector<unsigned, 4> ArgNos; 625 unsigned ArgNo = 0; 626 627 for (Value *V : CB.args()) { 628 PointerType *Type = dyn_cast<PointerType>(V->getType()); 629 // Try to mark pointer typed parameters as non-null. We skip the 630 // relatively expensive analysis for constants which are obviously either 631 // null or non-null to start with. 632 if (Type && !CB.paramHasAttr(ArgNo, Attribute::NonNull) && 633 !isa<Constant>(V) && 634 LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, 635 ConstantPointerNull::get(Type), &CB, 636 /*UseBlockValue=*/false) == LazyValueInfo::False) 637 ArgNos.push_back(ArgNo); 638 ArgNo++; 639 } 640 641 assert(ArgNo == CB.arg_size() && "Call arguments not processed correctly."); 642 643 if (ArgNos.empty()) 644 return Changed; 645 646 NumNonNull += ArgNos.size(); 647 AttributeList AS = CB.getAttributes(); 648 LLVMContext &Ctx = CB.getContext(); 649 AS = AS.addParamAttribute(Ctx, ArgNos, 650 Attribute::get(Ctx, Attribute::NonNull)); 651 CB.setAttributes(AS); 652 653 return true; 654 } 655 656 enum class Domain { NonNegative, NonPositive, Unknown }; 657 658 static Domain getDomain(const ConstantRange &CR) { 659 if (CR.isAllNonNegative()) 660 return Domain::NonNegative; 661 if (CR.icmp(ICmpInst::ICMP_SLE, APInt::getZero(CR.getBitWidth()))) 662 return Domain::NonPositive; 663 return Domain::Unknown; 664 } 665 666 /// Try to shrink a sdiv/srem's width down to the smallest power of two that's 667 /// sufficient to contain its operands. 668 static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR, 669 const ConstantRange &RCR) { 670 assert(Instr->getOpcode() == Instruction::SDiv || 671 Instr->getOpcode() == Instruction::SRem); 672 assert(!Instr->getType()->isVectorTy()); 673 674 // Find the smallest power of two bitwidth that's sufficient to hold Instr's 675 // operands. 676 unsigned OrigWidth = Instr->getType()->getIntegerBitWidth(); 677 678 // What is the smallest bit width that can accommodate the entire value ranges 679 // of both of the operands? 680 unsigned MinSignedBits = 681 std::max(LCR.getMinSignedBits(), RCR.getMinSignedBits()); 682 683 // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can 684 // prove that such a combination is impossible, we need to bump the bitwidth. 685 if (RCR.contains(APInt::getAllOnes(OrigWidth)) && 686 LCR.contains(APInt::getSignedMinValue(MinSignedBits).sext(OrigWidth))) 687 ++MinSignedBits; 688 689 // Don't shrink below 8 bits wide. 690 unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8); 691 692 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of 693 // two. 694 if (NewWidth >= OrigWidth) 695 return false; 696 697 ++NumSDivSRemsNarrowed; 698 IRBuilder<> B{Instr}; 699 auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); 700 auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, 701 Instr->getName() + ".lhs.trunc"); 702 auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, 703 Instr->getName() + ".rhs.trunc"); 704 auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); 705 auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext"); 706 if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) 707 if (BinOp->getOpcode() == Instruction::SDiv) 708 BinOp->setIsExact(Instr->isExact()); 709 710 Instr->replaceAllUsesWith(Sext); 711 Instr->eraseFromParent(); 712 return true; 713 } 714 715 static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, 716 const ConstantRange &YCR) { 717 Type *Ty = Instr->getType(); 718 assert(Instr->getOpcode() == Instruction::UDiv || 719 Instr->getOpcode() == Instruction::URem); 720 assert(!Ty->isVectorTy()); 721 bool IsRem = Instr->getOpcode() == Instruction::URem; 722 723 Value *X = Instr->getOperand(0); 724 Value *Y = Instr->getOperand(1); 725 726 // X u/ Y -> 0 iff X u< Y 727 // X u% Y -> X iff X u< Y 728 if (XCR.icmp(ICmpInst::ICMP_ULT, YCR)) { 729 Instr->replaceAllUsesWith(IsRem ? X : Constant::getNullValue(Ty)); 730 Instr->eraseFromParent(); 731 ++NumUDivURemsNarrowedExpanded; 732 return true; 733 } 734 735 // Given 736 // R = X u% Y 737 // We can represent the modulo operation as a loop/self-recursion: 738 // urem_rec(X, Y): 739 // Z = X - Y 740 // if X u< Y 741 // ret X 742 // else 743 // ret urem_rec(Z, Y) 744 // which isn't better, but if we only need a single iteration 745 // to compute the answer, this becomes quite good: 746 // R = X < Y ? X : X - Y iff X u< 2*Y (w/ unsigned saturation) 747 // Now, we do not care about all full multiples of Y in X, they do not change 748 // the answer, thus we could rewrite the expression as: 749 // X* = X - (Y * |_ X / Y _|) 750 // R = X* % Y 751 // so we don't need the *first* iteration to return, we just need to 752 // know *which* iteration will always return, so we could also rewrite it as: 753 // X* = X - (Y * |_ X / Y _|) 754 // R = X* % Y iff X* u< 2*Y (w/ unsigned saturation) 755 // but that does not seem profitable here. 756 757 // Even if we don't know X's range, the divisor may be so large, X can't ever 758 // be 2x larger than that. I.e. if divisor is always negative. 759 if (!XCR.icmp(ICmpInst::ICMP_ULT, 760 YCR.umul_sat(APInt(YCR.getBitWidth(), 2))) && 761 !YCR.isAllNegative()) 762 return false; 763 764 IRBuilder<> B(Instr); 765 Value *ExpandedOp; 766 if (XCR.icmp(ICmpInst::ICMP_UGE, YCR)) { 767 // If X is between Y and 2*Y the result is known. 768 if (IsRem) 769 ExpandedOp = B.CreateNUWSub(X, Y); 770 else 771 ExpandedOp = ConstantInt::get(Instr->getType(), 1); 772 } else if (IsRem) { 773 // NOTE: this transformation introduces two uses of X, 774 // but it may be undef so we must freeze it first. 775 Value *FrozenX = X; 776 if (!isGuaranteedNotToBeUndefOrPoison(X)) 777 FrozenX = B.CreateFreeze(X, X->getName() + ".frozen"); 778 auto *AdjX = B.CreateNUWSub(FrozenX, Y, Instr->getName() + ".urem"); 779 auto *Cmp = 780 B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, Y, Instr->getName() + ".cmp"); 781 ExpandedOp = B.CreateSelect(Cmp, FrozenX, AdjX); 782 } else { 783 auto *Cmp = 784 B.CreateICmp(ICmpInst::ICMP_UGE, X, Y, Instr->getName() + ".cmp"); 785 ExpandedOp = B.CreateZExt(Cmp, Ty, Instr->getName() + ".udiv"); 786 } 787 ExpandedOp->takeName(Instr); 788 Instr->replaceAllUsesWith(ExpandedOp); 789 Instr->eraseFromParent(); 790 ++NumUDivURemsNarrowedExpanded; 791 return true; 792 } 793 794 /// Try to shrink a udiv/urem's width down to the smallest power of two that's 795 /// sufficient to contain its operands. 796 static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR, 797 const ConstantRange &YCR) { 798 assert(Instr->getOpcode() == Instruction::UDiv || 799 Instr->getOpcode() == Instruction::URem); 800 assert(!Instr->getType()->isVectorTy()); 801 802 // Find the smallest power of two bitwidth that's sufficient to hold Instr's 803 // operands. 804 805 // What is the smallest bit width that can accommodate the entire value ranges 806 // of both of the operands? 807 unsigned MaxActiveBits = std::max(XCR.getActiveBits(), YCR.getActiveBits()); 808 // Don't shrink below 8 bits wide. 809 unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8); 810 811 // NewWidth might be greater than OrigWidth if OrigWidth is not a power of 812 // two. 813 if (NewWidth >= Instr->getType()->getIntegerBitWidth()) 814 return false; 815 816 ++NumUDivURemsNarrowed; 817 IRBuilder<> B{Instr}; 818 auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); 819 auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, 820 Instr->getName() + ".lhs.trunc"); 821 auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, 822 Instr->getName() + ".rhs.trunc"); 823 auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); 824 auto *Zext = B.CreateZExt(BO, Instr->getType(), Instr->getName() + ".zext"); 825 if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) 826 if (BinOp->getOpcode() == Instruction::UDiv) 827 BinOp->setIsExact(Instr->isExact()); 828 829 Instr->replaceAllUsesWith(Zext); 830 Instr->eraseFromParent(); 831 return true; 832 } 833 834 static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { 835 assert(Instr->getOpcode() == Instruction::UDiv || 836 Instr->getOpcode() == Instruction::URem); 837 if (Instr->getType()->isVectorTy()) 838 return false; 839 840 ConstantRange XCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0)); 841 ConstantRange YCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(1)); 842 if (expandUDivOrURem(Instr, XCR, YCR)) 843 return true; 844 845 return narrowUDivOrURem(Instr, XCR, YCR); 846 } 847 848 static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR, 849 const ConstantRange &RCR, LazyValueInfo *LVI) { 850 assert(SDI->getOpcode() == Instruction::SRem); 851 assert(!SDI->getType()->isVectorTy()); 852 853 if (LCR.abs().icmp(CmpInst::ICMP_ULT, RCR.abs())) { 854 SDI->replaceAllUsesWith(SDI->getOperand(0)); 855 SDI->eraseFromParent(); 856 return true; 857 } 858 859 struct Operand { 860 Value *V; 861 Domain D; 862 }; 863 std::array<Operand, 2> Ops = {{{SDI->getOperand(0), getDomain(LCR)}, 864 {SDI->getOperand(1), getDomain(RCR)}}}; 865 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown) 866 return false; 867 868 // We know domains of both of the operands! 869 ++NumSRems; 870 871 // We need operands to be non-negative, so negate each one that isn't. 872 for (Operand &Op : Ops) { 873 if (Op.D == Domain::NonNegative) 874 continue; 875 auto *BO = 876 BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); 877 BO->setDebugLoc(SDI->getDebugLoc()); 878 Op.V = BO; 879 } 880 881 auto *URem = 882 BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI); 883 URem->setDebugLoc(SDI->getDebugLoc()); 884 885 auto *Res = URem; 886 887 // If the divident was non-positive, we need to negate the result. 888 if (Ops[0].D == Domain::NonPositive) { 889 Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); 890 Res->setDebugLoc(SDI->getDebugLoc()); 891 } 892 893 SDI->replaceAllUsesWith(Res); 894 SDI->eraseFromParent(); 895 896 // Try to simplify our new urem. 897 processUDivOrURem(URem, LVI); 898 899 return true; 900 } 901 902 /// See if LazyValueInfo's ability to exploit edge conditions or range 903 /// information is sufficient to prove the signs of both operands of this SDiv. 904 /// If this is the case, replace the SDiv with a UDiv. Even for local 905 /// conditions, this can sometimes prove conditions instcombine can't by 906 /// exploiting range information. 907 static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR, 908 const ConstantRange &RCR, LazyValueInfo *LVI) { 909 assert(SDI->getOpcode() == Instruction::SDiv); 910 assert(!SDI->getType()->isVectorTy()); 911 912 struct Operand { 913 Value *V; 914 Domain D; 915 }; 916 std::array<Operand, 2> Ops = {{{SDI->getOperand(0), getDomain(LCR)}, 917 {SDI->getOperand(1), getDomain(RCR)}}}; 918 if (Ops[0].D == Domain::Unknown || Ops[1].D == Domain::Unknown) 919 return false; 920 921 // We know domains of both of the operands! 922 ++NumSDivs; 923 924 // We need operands to be non-negative, so negate each one that isn't. 925 for (Operand &Op : Ops) { 926 if (Op.D == Domain::NonNegative) 927 continue; 928 auto *BO = 929 BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); 930 BO->setDebugLoc(SDI->getDebugLoc()); 931 Op.V = BO; 932 } 933 934 auto *UDiv = 935 BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI); 936 UDiv->setDebugLoc(SDI->getDebugLoc()); 937 UDiv->setIsExact(SDI->isExact()); 938 939 Value *Res = UDiv; 940 941 // If the operands had two different domains, we need to negate the result. 942 if (Ops[0].D != Ops[1].D) 943 Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); 944 945 SDI->replaceAllUsesWith(Res); 946 SDI->eraseFromParent(); 947 948 // Try to simplify our new udiv. 949 processUDivOrURem(UDiv, LVI); 950 951 return true; 952 } 953 954 static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { 955 assert(Instr->getOpcode() == Instruction::SDiv || 956 Instr->getOpcode() == Instruction::SRem); 957 if (Instr->getType()->isVectorTy()) 958 return false; 959 960 ConstantRange LCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0)); 961 ConstantRange RCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(1)); 962 if (Instr->getOpcode() == Instruction::SDiv) 963 if (processSDiv(Instr, LCR, RCR, LVI)) 964 return true; 965 966 if (Instr->getOpcode() == Instruction::SRem) { 967 if (processSRem(Instr, LCR, RCR, LVI)) 968 return true; 969 } 970 971 return narrowSDivOrSRem(Instr, LCR, RCR); 972 } 973 974 static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { 975 if (SDI->getType()->isVectorTy()) 976 return false; 977 978 ConstantRange LRange = 979 LVI->getConstantRangeAtUse(SDI->getOperandUse(0), /*UndefAllowed*/ false); 980 unsigned OrigWidth = SDI->getType()->getIntegerBitWidth(); 981 ConstantRange NegOneOrZero = 982 ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1)); 983 if (NegOneOrZero.contains(LRange)) { 984 // ashr of -1 or 0 never changes the value, so drop the whole instruction 985 ++NumAShrsRemoved; 986 SDI->replaceAllUsesWith(SDI->getOperand(0)); 987 SDI->eraseFromParent(); 988 return true; 989 } 990 991 if (!LRange.isAllNonNegative()) 992 return false; 993 994 ++NumAShrsConverted; 995 auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1), 996 "", SDI); 997 BO->takeName(SDI); 998 BO->setDebugLoc(SDI->getDebugLoc()); 999 BO->setIsExact(SDI->isExact()); 1000 SDI->replaceAllUsesWith(BO); 1001 SDI->eraseFromParent(); 1002 1003 return true; 1004 } 1005 1006 static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { 1007 if (SDI->getType()->isVectorTy()) 1008 return false; 1009 1010 const Use &Base = SDI->getOperandUse(0); 1011 if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false) 1012 .isAllNonNegative()) 1013 return false; 1014 1015 ++NumSExt; 1016 auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", SDI); 1017 ZExt->takeName(SDI); 1018 ZExt->setDebugLoc(SDI->getDebugLoc()); 1019 SDI->replaceAllUsesWith(ZExt); 1020 SDI->eraseFromParent(); 1021 1022 return true; 1023 } 1024 1025 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { 1026 using OBO = OverflowingBinaryOperator; 1027 1028 if (BinOp->getType()->isVectorTy()) 1029 return false; 1030 1031 bool NSW = BinOp->hasNoSignedWrap(); 1032 bool NUW = BinOp->hasNoUnsignedWrap(); 1033 if (NSW && NUW) 1034 return false; 1035 1036 Instruction::BinaryOps Opcode = BinOp->getOpcode(); 1037 Value *LHS = BinOp->getOperand(0); 1038 Value *RHS = BinOp->getOperand(1); 1039 1040 ConstantRange LRange = LVI->getConstantRange(LHS, BinOp); 1041 ConstantRange RRange = LVI->getConstantRange(RHS, BinOp); 1042 1043 bool Changed = false; 1044 bool NewNUW = false, NewNSW = false; 1045 if (!NUW) { 1046 ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( 1047 Opcode, RRange, OBO::NoUnsignedWrap); 1048 NewNUW = NUWRange.contains(LRange); 1049 Changed |= NewNUW; 1050 } 1051 if (!NSW) { 1052 ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( 1053 Opcode, RRange, OBO::NoSignedWrap); 1054 NewNSW = NSWRange.contains(LRange); 1055 Changed |= NewNSW; 1056 } 1057 1058 setDeducedOverflowingFlags(BinOp, Opcode, NewNSW, NewNUW); 1059 1060 return Changed; 1061 } 1062 1063 static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { 1064 if (BinOp->getType()->isVectorTy()) 1065 return false; 1066 1067 // Pattern match (and lhs, C) where C includes a superset of bits which might 1068 // be set in lhs. This is a common truncation idiom created by instcombine. 1069 const Use &LHS = BinOp->getOperandUse(0); 1070 ConstantInt *RHS = dyn_cast<ConstantInt>(BinOp->getOperand(1)); 1071 if (!RHS || !RHS->getValue().isMask()) 1072 return false; 1073 1074 // We can only replace the AND with LHS based on range info if the range does 1075 // not include undef. 1076 ConstantRange LRange = 1077 LVI->getConstantRangeAtUse(LHS, /*UndefAllowed=*/false); 1078 if (!LRange.getUnsignedMax().ule(RHS->getValue())) 1079 return false; 1080 1081 BinOp->replaceAllUsesWith(LHS); 1082 BinOp->eraseFromParent(); 1083 NumAnd++; 1084 return true; 1085 } 1086 1087 1088 static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { 1089 if (Constant *C = LVI->getConstant(V, At)) 1090 return C; 1091 1092 // TODO: The following really should be sunk inside LVI's core algorithm, or 1093 // at least the outer shims around such. 1094 auto *C = dyn_cast<CmpInst>(V); 1095 if (!C) return nullptr; 1096 1097 Value *Op0 = C->getOperand(0); 1098 Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); 1099 if (!Op1) return nullptr; 1100 1101 LazyValueInfo::Tristate Result = LVI->getPredicateAt( 1102 C->getPredicate(), Op0, Op1, At, /*UseBlockValue=*/false); 1103 if (Result == LazyValueInfo::Unknown) 1104 return nullptr; 1105 1106 return (Result == LazyValueInfo::True) ? 1107 ConstantInt::getTrue(C->getContext()) : 1108 ConstantInt::getFalse(C->getContext()); 1109 } 1110 1111 static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, 1112 const SimplifyQuery &SQ) { 1113 bool FnChanged = false; 1114 // Visiting in a pre-order depth-first traversal causes us to simplify early 1115 // blocks before querying later blocks (which require us to analyze early 1116 // blocks). Eagerly simplifying shallow blocks means there is strictly less 1117 // work to do for deep blocks. This also means we don't visit unreachable 1118 // blocks. 1119 for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { 1120 bool BBChanged = false; 1121 for (Instruction &II : llvm::make_early_inc_range(*BB)) { 1122 switch (II.getOpcode()) { 1123 case Instruction::Select: 1124 BBChanged |= processSelect(cast<SelectInst>(&II), LVI); 1125 break; 1126 case Instruction::PHI: 1127 BBChanged |= processPHI(cast<PHINode>(&II), LVI, DT, SQ); 1128 break; 1129 case Instruction::ICmp: 1130 case Instruction::FCmp: 1131 BBChanged |= processCmp(cast<CmpInst>(&II), LVI); 1132 break; 1133 case Instruction::Load: 1134 case Instruction::Store: 1135 BBChanged |= processMemAccess(&II, LVI); 1136 break; 1137 case Instruction::Call: 1138 case Instruction::Invoke: 1139 BBChanged |= processCallSite(cast<CallBase>(II), LVI); 1140 break; 1141 case Instruction::SRem: 1142 case Instruction::SDiv: 1143 BBChanged |= processSDivOrSRem(cast<BinaryOperator>(&II), LVI); 1144 break; 1145 case Instruction::UDiv: 1146 case Instruction::URem: 1147 BBChanged |= processUDivOrURem(cast<BinaryOperator>(&II), LVI); 1148 break; 1149 case Instruction::AShr: 1150 BBChanged |= processAShr(cast<BinaryOperator>(&II), LVI); 1151 break; 1152 case Instruction::SExt: 1153 BBChanged |= processSExt(cast<SExtInst>(&II), LVI); 1154 break; 1155 case Instruction::Add: 1156 case Instruction::Sub: 1157 case Instruction::Mul: 1158 case Instruction::Shl: 1159 BBChanged |= processBinOp(cast<BinaryOperator>(&II), LVI); 1160 break; 1161 case Instruction::And: 1162 BBChanged |= processAnd(cast<BinaryOperator>(&II), LVI); 1163 break; 1164 } 1165 } 1166 1167 Instruction *Term = BB->getTerminator(); 1168 switch (Term->getOpcode()) { 1169 case Instruction::Switch: 1170 BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT); 1171 break; 1172 case Instruction::Ret: { 1173 auto *RI = cast<ReturnInst>(Term); 1174 // Try to determine the return value if we can. This is mainly here to 1175 // simplify the writing of unit tests, but also helps to enable IPO by 1176 // constant folding the return values of callees. 1177 auto *RetVal = RI->getReturnValue(); 1178 if (!RetVal) break; // handle "ret void" 1179 if (isa<Constant>(RetVal)) break; // nothing to do 1180 if (auto *C = getConstantAt(RetVal, RI, LVI)) { 1181 ++NumReturns; 1182 RI->replaceUsesOfWith(RetVal, C); 1183 BBChanged = true; 1184 } 1185 } 1186 } 1187 1188 FnChanged |= BBChanged; 1189 } 1190 1191 return FnChanged; 1192 } 1193 1194 PreservedAnalyses 1195 CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { 1196 LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); 1197 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1198 1199 bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F)); 1200 1201 PreservedAnalyses PA; 1202 if (!Changed) { 1203 PA = PreservedAnalyses::all(); 1204 } else { 1205 PA.preserve<DominatorTreeAnalysis>(); 1206 PA.preserve<LazyValueAnalysis>(); 1207 } 1208 1209 // Keeping LVI alive is expensive, both because it uses a lot of memory, and 1210 // because invalidating values in LVI is expensive. While CVP does preserve 1211 // LVI, we know that passes after JumpThreading+CVP will not need the result 1212 // of this analysis, so we forcefully discard it early. 1213 PA.abandon<LazyValueAnalysis>(); 1214 return PA; 1215 } 1216