1 //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a set of utility VPlan to VPlan transformations. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlanTransforms.h" 15 #include "VPlanDominatorTree.h" 16 #include "VPRecipeBuilder.h" 17 #include "VPlanCFG.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/Analysis/IVDescriptors.h" 21 #include "llvm/Analysis/VectorUtils.h" 22 #include "llvm/IR/Intrinsics.h" 23 24 using namespace llvm; 25 26 void VPlanTransforms::VPInstructionsToVPRecipes( 27 VPlanPtr &Plan, 28 function_ref<const InductionDescriptor *(PHINode *)> 29 GetIntOrFpInductionDescriptor, 30 ScalarEvolution &SE, const TargetLibraryInfo &TLI) { 31 32 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 33 Plan->getEntry()); 34 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { 35 VPRecipeBase *Term = VPBB->getTerminator(); 36 auto EndIter = Term ? Term->getIterator() : VPBB->end(); 37 // Introduce each ingredient into VPlan. 38 for (VPRecipeBase &Ingredient : 39 make_early_inc_range(make_range(VPBB->begin(), EndIter))) { 40 41 VPValue *VPV = Ingredient.getVPSingleValue(); 42 Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); 43 44 VPRecipeBase *NewRecipe = nullptr; 45 if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { 46 auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); 47 if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { 48 VPValue *Start = Plan->getVPValueOrAddLiveIn(II->getStartValue()); 49 VPValue *Step = 50 vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); 51 NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II); 52 } else { 53 Plan->addVPValue(Phi, VPPhi); 54 continue; 55 } 56 } else { 57 assert(isa<VPInstruction>(&Ingredient) && 58 "only VPInstructions expected here"); 59 assert(!isa<PHINode>(Inst) && "phis should be handled above"); 60 // Create VPWidenMemoryInstructionRecipe for loads and stores. 61 if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 62 NewRecipe = new VPWidenMemoryInstructionRecipe( 63 *Load, Ingredient.getOperand(0), nullptr /*Mask*/, 64 false /*Consecutive*/, false /*Reverse*/); 65 } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 66 NewRecipe = new VPWidenMemoryInstructionRecipe( 67 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), 68 nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); 69 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { 70 NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); 71 } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { 72 NewRecipe = 73 new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()), 74 getVectorIntrinsicIDForCall(CI, &TLI)); 75 } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { 76 NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands()); 77 } else if (auto *CI = dyn_cast<CastInst>(Inst)) { 78 NewRecipe = new VPWidenCastRecipe( 79 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI); 80 } else { 81 NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands()); 82 } 83 } 84 85 NewRecipe->insertBefore(&Ingredient); 86 if (NewRecipe->getNumDefinedValues() == 1) 87 VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); 88 else 89 assert(NewRecipe->getNumDefinedValues() == 0 && 90 "Only recpies with zero or one defined values expected"); 91 Ingredient.eraseFromParent(); 92 } 93 } 94 } 95 96 static bool sinkScalarOperands(VPlan &Plan) { 97 auto Iter = vp_depth_first_deep(Plan.getEntry()); 98 bool Changed = false; 99 // First, collect the operands of all recipes in replicate blocks as seeds for 100 // sinking. 101 SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList; 102 for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) { 103 VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock(); 104 if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2) 105 continue; 106 VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]); 107 if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock()) 108 continue; 109 for (auto &Recipe : *VPBB) { 110 for (VPValue *Op : Recipe.operands()) 111 if (auto *Def = Op->getDefiningRecipe()) 112 WorkList.insert(std::make_pair(VPBB, Def)); 113 } 114 } 115 116 bool ScalarVFOnly = Plan.hasScalarVFOnly(); 117 // Try to sink each replicate or scalar IV steps recipe in the worklist. 118 for (unsigned I = 0; I != WorkList.size(); ++I) { 119 VPBasicBlock *SinkTo; 120 VPRecipeBase *SinkCandidate; 121 std::tie(SinkTo, SinkCandidate) = WorkList[I]; 122 if (SinkCandidate->getParent() == SinkTo || 123 SinkCandidate->mayHaveSideEffects() || 124 SinkCandidate->mayReadOrWriteMemory()) 125 continue; 126 if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) { 127 if (!ScalarVFOnly && RepR->isUniform()) 128 continue; 129 } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate)) 130 continue; 131 132 bool NeedsDuplicating = false; 133 // All recipe users of the sink candidate must be in the same block SinkTo 134 // or all users outside of SinkTo must be uniform-after-vectorization ( 135 // i.e., only first lane is used) . In the latter case, we need to duplicate 136 // SinkCandidate. 137 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, 138 SinkCandidate](VPUser *U) { 139 auto *UI = dyn_cast<VPRecipeBase>(U); 140 if (!UI) 141 return false; 142 if (UI->getParent() == SinkTo) 143 return true; 144 NeedsDuplicating = 145 UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue()); 146 // We only know how to duplicate VPRecipeRecipes for now. 147 return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate); 148 }; 149 if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser)) 150 continue; 151 152 if (NeedsDuplicating) { 153 if (ScalarVFOnly) 154 continue; 155 Instruction *I = cast<Instruction>( 156 cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue()); 157 auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true); 158 // TODO: add ".cloned" suffix to name of Clone's VPValue. 159 160 Clone->insertBefore(SinkCandidate); 161 for (auto *U : to_vector(SinkCandidate->getVPSingleValue()->users())) { 162 auto *UI = cast<VPRecipeBase>(U); 163 if (UI->getParent() == SinkTo) 164 continue; 165 166 for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) { 167 if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue()) 168 continue; 169 UI->setOperand(Idx, Clone); 170 } 171 } 172 } 173 SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); 174 for (VPValue *Op : SinkCandidate->operands()) 175 if (auto *Def = Op->getDefiningRecipe()) 176 WorkList.insert(std::make_pair(SinkTo, Def)); 177 Changed = true; 178 } 179 return Changed; 180 } 181 182 /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return 183 /// the mask. 184 VPValue *getPredicatedMask(VPRegionBlock *R) { 185 auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry()); 186 if (!EntryBB || EntryBB->size() != 1 || 187 !isa<VPBranchOnMaskRecipe>(EntryBB->begin())) 188 return nullptr; 189 190 return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0); 191 } 192 193 /// If \p R is a triangle region, return the 'then' block of the triangle. 194 static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { 195 auto *EntryBB = cast<VPBasicBlock>(R->getEntry()); 196 if (EntryBB->getNumSuccessors() != 2) 197 return nullptr; 198 199 auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]); 200 auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]); 201 if (!Succ0 || !Succ1) 202 return nullptr; 203 204 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) 205 return nullptr; 206 if (Succ0->getSingleSuccessor() == Succ1) 207 return Succ0; 208 if (Succ1->getSingleSuccessor() == Succ0) 209 return Succ1; 210 return nullptr; 211 } 212 213 // Merge replicate regions in their successor region, if a replicate region 214 // is connected to a successor replicate region with the same predicate by a 215 // single, empty VPBasicBlock. 216 static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { 217 SetVector<VPRegionBlock *> DeletedRegions; 218 219 // Collect replicate regions followed by an empty block, followed by another 220 // replicate region with matching masks to process front. This is to avoid 221 // iterator invalidation issues while merging regions. 222 SmallVector<VPRegionBlock *, 8> WorkList; 223 for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>( 224 vp_depth_first_deep(Plan.getEntry()))) { 225 if (!Region1->isReplicator()) 226 continue; 227 auto *MiddleBasicBlock = 228 dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor()); 229 if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) 230 continue; 231 232 auto *Region2 = 233 dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 234 if (!Region2 || !Region2->isReplicator()) 235 continue; 236 237 VPValue *Mask1 = getPredicatedMask(Region1); 238 VPValue *Mask2 = getPredicatedMask(Region2); 239 if (!Mask1 || Mask1 != Mask2) 240 continue; 241 242 assert(Mask1 && Mask2 && "both region must have conditions"); 243 WorkList.push_back(Region1); 244 } 245 246 // Move recipes from Region1 to its successor region, if both are triangles. 247 for (VPRegionBlock *Region1 : WorkList) { 248 if (DeletedRegions.contains(Region1)) 249 continue; 250 auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor()); 251 auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 252 253 VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); 254 VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); 255 if (!Then1 || !Then2) 256 continue; 257 258 // Note: No fusion-preventing memory dependencies are expected in either 259 // region. Such dependencies should be rejected during earlier dependence 260 // checks, which guarantee accesses can be re-ordered for vectorization. 261 // 262 // Move recipes to the successor region. 263 for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) 264 ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); 265 266 auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor()); 267 auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor()); 268 269 // Move VPPredInstPHIRecipes from the merge block to the successor region's 270 // merge block. Update all users inside the successor region to use the 271 // original values. 272 for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { 273 VPValue *PredInst1 = 274 cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); 275 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); 276 for (VPUser *U : to_vector(Phi1ToMoveV->users())) { 277 auto *UI = dyn_cast<VPRecipeBase>(U); 278 if (!UI || UI->getParent() != Then2) 279 continue; 280 for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { 281 if (Phi1ToMoveV != U->getOperand(I)) 282 continue; 283 U->setOperand(I, PredInst1); 284 } 285 } 286 287 Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); 288 } 289 290 // Finally, remove the first region. 291 for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { 292 VPBlockUtils::disconnectBlocks(Pred, Region1); 293 VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); 294 } 295 VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); 296 DeletedRegions.insert(Region1); 297 } 298 299 for (VPRegionBlock *ToDelete : DeletedRegions) 300 delete ToDelete; 301 return !DeletedRegions.empty(); 302 } 303 304 static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, 305 VPlan &Plan) { 306 Instruction *Instr = PredRecipe->getUnderlyingInstr(); 307 // Build the triangular if-then region. 308 std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str(); 309 assert(Instr->getParent() && "Predicated instruction not in any basic block"); 310 auto *BlockInMask = PredRecipe->getMask(); 311 auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask); 312 auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe); 313 314 // Replace predicated replicate recipe with a replicate recipe without a 315 // mask but in the replicate region. 316 auto *RecipeWithoutMask = new VPReplicateRecipe( 317 PredRecipe->getUnderlyingInstr(), 318 make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())), 319 PredRecipe->isUniform()); 320 auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask); 321 322 VPPredInstPHIRecipe *PHIRecipe = nullptr; 323 if (PredRecipe->getNumUsers() != 0) { 324 PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask); 325 PredRecipe->replaceAllUsesWith(PHIRecipe); 326 PHIRecipe->setOperand(0, RecipeWithoutMask); 327 } 328 PredRecipe->eraseFromParent(); 329 auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); 330 VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); 331 332 // Note: first set Entry as region entry and then connect successors starting 333 // from it in order, to propagate the "parent" of each VPBasicBlock. 334 VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry); 335 VPBlockUtils::connectBlocks(Pred, Exiting); 336 337 return Region; 338 } 339 340 static void addReplicateRegions(VPlan &Plan) { 341 SmallVector<VPReplicateRecipe *> WorkList; 342 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 343 vp_depth_first_deep(Plan.getEntry()))) { 344 for (VPRecipeBase &R : *VPBB) 345 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) { 346 if (RepR->isPredicated()) 347 WorkList.push_back(RepR); 348 } 349 } 350 351 unsigned BBNum = 0; 352 for (VPReplicateRecipe *RepR : WorkList) { 353 VPBasicBlock *CurrentBlock = RepR->getParent(); 354 VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator()); 355 356 BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent(); 357 SplitBlock->setName( 358 OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : ""); 359 // Record predicated instructions for above packing optimizations. 360 VPBlockBase *Region = createReplicateRegion(RepR, Plan); 361 Region->setParent(CurrentBlock->getParent()); 362 VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock); 363 VPBlockUtils::connectBlocks(CurrentBlock, Region); 364 VPBlockUtils::connectBlocks(Region, SplitBlock); 365 } 366 } 367 368 void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) { 369 // Convert masked VPReplicateRecipes to if-then region blocks. 370 addReplicateRegions(Plan); 371 372 bool ShouldSimplify = true; 373 while (ShouldSimplify) { 374 ShouldSimplify = sinkScalarOperands(Plan); 375 ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan); 376 ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(Plan); 377 } 378 } 379 bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) { 380 SmallVector<VPBasicBlock *> WorkList; 381 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 382 vp_depth_first_deep(Plan.getEntry()))) { 383 auto *PredVPBB = 384 dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor()); 385 if (PredVPBB && PredVPBB->getNumSuccessors() == 1) 386 WorkList.push_back(VPBB); 387 } 388 389 for (VPBasicBlock *VPBB : WorkList) { 390 VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor()); 391 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) 392 R.moveBefore(*PredVPBB, PredVPBB->end()); 393 VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); 394 auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent()); 395 if (ParentRegion && ParentRegion->getExiting() == VPBB) 396 ParentRegion->setExiting(PredVPBB); 397 for (auto *Succ : to_vector(VPBB->successors())) { 398 VPBlockUtils::disconnectBlocks(VPBB, Succ); 399 VPBlockUtils::connectBlocks(PredVPBB, Succ); 400 } 401 delete VPBB; 402 } 403 return !WorkList.empty(); 404 } 405 406 void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { 407 for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 408 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 409 if (!IV || IV->getTruncInst()) 410 continue; 411 412 // A sequence of IR Casts has potentially been recorded for IV, which 413 // *must be bypassed* when the IV is vectorized, because the vectorized IV 414 // will produce the desired casted value. This sequence forms a def-use 415 // chain and is provided in reverse order, ending with the cast that uses 416 // the IV phi. Search for the recipe of the last cast in the chain and 417 // replace it with the original IV. Note that only the final cast is 418 // expected to have users outside the cast-chain and the dead casts left 419 // over will be cleaned up later. 420 auto &Casts = IV->getInductionDescriptor().getCastInsts(); 421 VPValue *FindMyCast = IV; 422 for (Instruction *IRCast : reverse(Casts)) { 423 VPRecipeBase *FoundUserCast = nullptr; 424 for (auto *U : FindMyCast->users()) { 425 auto *UserCast = cast<VPRecipeBase>(U); 426 if (UserCast->getNumDefinedValues() == 1 && 427 UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { 428 FoundUserCast = UserCast; 429 break; 430 } 431 } 432 FindMyCast = FoundUserCast->getVPSingleValue(); 433 } 434 FindMyCast->replaceAllUsesWith(IV); 435 } 436 } 437 438 void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { 439 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 440 VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; 441 for (VPUser *U : CanonicalIV->users()) { 442 WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); 443 if (WidenNewIV) 444 break; 445 } 446 447 if (!WidenNewIV) 448 return; 449 450 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 451 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 452 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 453 454 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || 455 WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) 456 continue; 457 458 // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides 459 // everything WidenNewIV's users need. That is, WidenOriginalIV will 460 // generate a vector phi or all users of WidenNewIV demand the first lane 461 // only. 462 if (any_of(WidenOriginalIV->users(), 463 [WidenOriginalIV](VPUser *U) { 464 return !U->usesScalars(WidenOriginalIV); 465 }) || 466 vputils::onlyFirstLaneUsed(WidenNewIV)) { 467 WidenNewIV->replaceAllUsesWith(WidenOriginalIV); 468 WidenNewIV->eraseFromParent(); 469 return; 470 } 471 } 472 } 473 474 void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { 475 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 476 Plan.getEntry()); 477 478 for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) { 479 // The recipes in the block are processed in reverse order, to catch chains 480 // of dead recipes. 481 for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { 482 if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { 483 return V->getNumUsers() > 0; 484 })) 485 continue; 486 R.eraseFromParent(); 487 } 488 } 489 } 490 491 void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { 492 SmallVector<VPRecipeBase *> ToRemove; 493 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 494 bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); 495 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 496 auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 497 if (!WideIV) 498 continue; 499 if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) { 500 return U->usesScalars(WideIV); 501 })) 502 continue; 503 504 auto IP = HeaderVPBB->getFirstNonPhi(); 505 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 506 Type *ResultTy = WideIV->getPHINode()->getType(); 507 if (Instruction *TruncI = WideIV->getTruncInst()) 508 ResultTy = TruncI->getType(); 509 const InductionDescriptor &ID = WideIV->getInductionDescriptor(); 510 VPValue *Step = WideIV->getStepValue(); 511 VPValue *BaseIV = CanonicalIV; 512 if (!CanonicalIV->isCanonical(ID.getKind(), WideIV->getStartValue(), Step, 513 ResultTy)) { 514 BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV, 515 Step, ResultTy); 516 HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); 517 } 518 519 VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); 520 HeaderVPBB->insert(Steps, IP); 521 522 // Update scalar users of IV to use Step instead. Use SetVector to ensure 523 // the list of users doesn't contain duplicates. 524 SetVector<VPUser *> Users(WideIV->user_begin(), WideIV->user_end()); 525 for (VPUser *U : Users) { 526 if (HasOnlyVectorVFs && !U->usesScalars(WideIV)) 527 continue; 528 for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) { 529 if (U->getOperand(I) != WideIV) 530 continue; 531 U->setOperand(I, Steps); 532 } 533 } 534 } 535 } 536 537 void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) { 538 DenseMap<const SCEV *, VPValue *> SCEV2VPV; 539 540 for (VPRecipeBase &R : 541 make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) { 542 auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R); 543 if (!ExpR) 544 continue; 545 546 auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR}); 547 if (I.second) 548 continue; 549 ExpR->replaceAllUsesWith(I.first->second); 550 ExpR->eraseFromParent(); 551 } 552 } 553 554 static bool canSimplifyBranchOnCond(VPInstruction *Term) { 555 VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0)); 556 if (!Not || Not->getOpcode() != VPInstruction::Not) 557 return false; 558 559 VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0)); 560 return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask; 561 } 562 563 void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, 564 unsigned BestUF, 565 PredicatedScalarEvolution &PSE) { 566 assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan"); 567 assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan"); 568 VPBasicBlock *ExitingVPBB = 569 Plan.getVectorLoopRegion()->getExitingBasicBlock(); 570 auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back()); 571 // Try to simplify the branch condition if TC <= VF * UF when preparing to 572 // execute the plan for the main vector loop. We only do this if the 573 // terminator is: 574 // 1. BranchOnCount, or 575 // 2. BranchOnCond where the input is Not(ActiveLaneMask). 576 if (!Term || (Term->getOpcode() != VPInstruction::BranchOnCount && 577 (Term->getOpcode() != VPInstruction::BranchOnCond || 578 !canSimplifyBranchOnCond(Term)))) 579 return; 580 581 Type *IdxTy = 582 Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType(); 583 const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE); 584 ScalarEvolution &SE = *PSE.getSE(); 585 const SCEV *C = 586 SE.getConstant(TripCount->getType(), BestVF.getKnownMinValue() * BestUF); 587 if (TripCount->isZero() || 588 !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) 589 return; 590 591 LLVMContext &Ctx = SE.getContext(); 592 auto *BOC = new VPInstruction( 593 VPInstruction::BranchOnCond, 594 {Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(Ctx))}); 595 Term->eraseFromParent(); 596 ExitingVPBB->appendRecipe(BOC); 597 Plan.setVF(BestVF); 598 Plan.setUF(BestUF); 599 // TODO: Further simplifications are possible 600 // 1. Replace inductions with constants. 601 // 2. Replace vector loop region with VPBasicBlock. 602 } 603 604 #ifndef NDEBUG 605 static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) { 606 auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); 607 if (Region && Region->isReplicator()) { 608 assert(Region->getNumSuccessors() == 1 && 609 Region->getNumPredecessors() == 1 && "Expected SESE region!"); 610 assert(R->getParent()->size() == 1 && 611 "A recipe in an original replicator region must be the only " 612 "recipe in its block"); 613 return Region; 614 } 615 return nullptr; 616 } 617 #endif 618 619 static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B, 620 VPDominatorTree &VPDT) { 621 if (A == B) 622 return false; 623 624 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { 625 for (auto &R : *A->getParent()) { 626 if (&R == A) 627 return true; 628 if (&R == B) 629 return false; 630 } 631 llvm_unreachable("recipe not found"); 632 }; 633 const VPBlockBase *ParentA = A->getParent(); 634 const VPBlockBase *ParentB = B->getParent(); 635 if (ParentA == ParentB) 636 return LocalComesBefore(A, B); 637 638 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && 639 "No replicate regions expected at this point"); 640 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && 641 "No replicate regions expected at this point"); 642 return VPDT.properlyDominates(ParentA, ParentB); 643 } 644 645 /// Sink users of \p FOR after the recipe defining the previous value \p 646 /// Previous of the recurrence. \returns true if all users of \p FOR could be 647 /// re-arranged as needed or false if it is not possible. 648 static bool 649 sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, 650 VPRecipeBase *Previous, 651 VPDominatorTree &VPDT) { 652 // Collect recipes that need sinking. 653 SmallVector<VPRecipeBase *> WorkList; 654 SmallPtrSet<VPRecipeBase *, 8> Seen; 655 Seen.insert(Previous); 656 auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) { 657 // The previous value must not depend on the users of the recurrence phi. In 658 // that case, FOR is not a fixed order recurrence. 659 if (SinkCandidate == Previous) 660 return false; 661 662 if (isa<VPHeaderPHIRecipe>(SinkCandidate) || 663 !Seen.insert(SinkCandidate).second || 664 properlyDominates(Previous, SinkCandidate, VPDT)) 665 return true; 666 667 if (SinkCandidate->mayHaveSideEffects()) 668 return false; 669 670 WorkList.push_back(SinkCandidate); 671 return true; 672 }; 673 674 // Recursively sink users of FOR after Previous. 675 WorkList.push_back(FOR); 676 for (unsigned I = 0; I != WorkList.size(); ++I) { 677 VPRecipeBase *Current = WorkList[I]; 678 assert(Current->getNumDefinedValues() == 1 && 679 "only recipes with a single defined value expected"); 680 681 for (VPUser *User : Current->getVPSingleValue()->users()) { 682 if (auto *R = dyn_cast<VPRecipeBase>(User)) 683 if (!TryToPushSinkCandidate(R)) 684 return false; 685 } 686 } 687 688 // Keep recipes to sink ordered by dominance so earlier instructions are 689 // processed first. 690 sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) { 691 return properlyDominates(A, B, VPDT); 692 }); 693 694 for (VPRecipeBase *SinkCandidate : WorkList) { 695 if (SinkCandidate == FOR) 696 continue; 697 698 SinkCandidate->moveAfter(Previous); 699 Previous = SinkCandidate; 700 } 701 return true; 702 } 703 704 bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, 705 VPBuilder &Builder) { 706 VPDominatorTree VPDT; 707 VPDT.recalculate(Plan); 708 709 SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis; 710 for (VPRecipeBase &R : 711 Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis()) 712 if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) 713 RecurrencePhis.push_back(FOR); 714 715 for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) { 716 SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis; 717 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe(); 718 // Fixed-order recurrences do not contain cycles, so this loop is guaranteed 719 // to terminate. 720 while (auto *PrevPhi = 721 dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) { 722 assert(PrevPhi->getParent() == FOR->getParent()); 723 assert(SeenPhis.insert(PrevPhi).second); 724 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe(); 725 } 726 727 if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT)) 728 return false; 729 730 // Introduce a recipe to combine the incoming and previous values of a 731 // fixed-order recurrence. 732 VPBasicBlock *InsertBlock = Previous->getParent(); 733 if (isa<VPHeaderPHIRecipe>(Previous)) 734 Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi()); 735 else 736 Builder.setInsertPoint(InsertBlock, std::next(Previous->getIterator())); 737 738 auto *RecurSplice = cast<VPInstruction>( 739 Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, 740 {FOR, FOR->getBackedgeValue()})); 741 742 FOR->replaceAllUsesWith(RecurSplice); 743 // Set the first operand of RecurSplice to FOR again, after replacing 744 // all users. 745 RecurSplice->setOperand(0, FOR); 746 } 747 return true; 748 } 749 750 void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) { 751 for (VPRecipeBase &R : 752 Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 753 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); 754 if (!PhiR) 755 continue; 756 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); 757 RecurKind RK = RdxDesc.getRecurrenceKind(); 758 if (RK != RecurKind::Add && RK != RecurKind::Mul) 759 continue; 760 761 SmallSetVector<VPValue *, 8> Worklist; 762 Worklist.insert(PhiR); 763 764 for (unsigned I = 0; I != Worklist.size(); ++I) { 765 VPValue *Cur = Worklist[I]; 766 if (auto *RecWithFlags = 767 dyn_cast<VPRecipeWithIRFlags>(Cur->getDefiningRecipe())) { 768 RecWithFlags->dropPoisonGeneratingFlags(); 769 } 770 771 for (VPUser *U : Cur->users()) { 772 auto *UserRecipe = dyn_cast<VPRecipeBase>(U); 773 if (!UserRecipe) 774 continue; 775 for (VPValue *V : UserRecipe->definedValues()) 776 Worklist.insert(V); 777 } 778 } 779 } 780 } 781