1 //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a set of utility VPlan to VPlan transformations.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlanTransforms.h"
15 #include "VPlanDominatorTree.h"
16 #include "VPRecipeBuilder.h"
17 #include "VPlanCFG.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/Analysis/IVDescriptors.h"
21 #include "llvm/Analysis/VectorUtils.h"
22 #include "llvm/IR/Intrinsics.h"
23 
24 using namespace llvm;
25 
26 void VPlanTransforms::VPInstructionsToVPRecipes(
27     VPlanPtr &Plan,
28     function_ref<const InductionDescriptor *(PHINode *)>
29         GetIntOrFpInductionDescriptor,
30     ScalarEvolution &SE, const TargetLibraryInfo &TLI) {
31 
32   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
33       Plan->getEntry());
34   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
35     VPRecipeBase *Term = VPBB->getTerminator();
36     auto EndIter = Term ? Term->getIterator() : VPBB->end();
37     // Introduce each ingredient into VPlan.
38     for (VPRecipeBase &Ingredient :
39          make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
40 
41       VPValue *VPV = Ingredient.getVPSingleValue();
42       Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
43 
44       VPRecipeBase *NewRecipe = nullptr;
45       if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
46         auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
47         if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
48           VPValue *Start = Plan->getVPValueOrAddLiveIn(II->getStartValue());
49           VPValue *Step =
50               vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
51           NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
52         } else {
53           Plan->addVPValue(Phi, VPPhi);
54           continue;
55         }
56       } else {
57         assert(isa<VPInstruction>(&Ingredient) &&
58                "only VPInstructions expected here");
59         assert(!isa<PHINode>(Inst) && "phis should be handled above");
60         // Create VPWidenMemoryInstructionRecipe for loads and stores.
61         if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
62           NewRecipe = new VPWidenMemoryInstructionRecipe(
63               *Load, Ingredient.getOperand(0), nullptr /*Mask*/,
64               false /*Consecutive*/, false /*Reverse*/);
65         } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
66           NewRecipe = new VPWidenMemoryInstructionRecipe(
67               *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
68               nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
69         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
70           NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
71         } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
72           NewRecipe =
73               new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()),
74                                     getVectorIntrinsicIDForCall(CI, &TLI));
75         } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
76           NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
77         } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
78           NewRecipe = new VPWidenCastRecipe(
79               CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI);
80         } else {
81           NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
82         }
83       }
84 
85       NewRecipe->insertBefore(&Ingredient);
86       if (NewRecipe->getNumDefinedValues() == 1)
87         VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
88       else
89         assert(NewRecipe->getNumDefinedValues() == 0 &&
90                "Only recpies with zero or one defined values expected");
91       Ingredient.eraseFromParent();
92     }
93   }
94 }
95 
96 static bool sinkScalarOperands(VPlan &Plan) {
97   auto Iter = vp_depth_first_deep(Plan.getEntry());
98   bool Changed = false;
99   // First, collect the operands of all recipes in replicate blocks as seeds for
100   // sinking.
101   SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList;
102   for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
103     VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
104     if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
105       continue;
106     VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
107     if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
108       continue;
109     for (auto &Recipe : *VPBB) {
110       for (VPValue *Op : Recipe.operands())
111         if (auto *Def = Op->getDefiningRecipe())
112           WorkList.insert(std::make_pair(VPBB, Def));
113     }
114   }
115 
116   bool ScalarVFOnly = Plan.hasScalarVFOnly();
117   // Try to sink each replicate or scalar IV steps recipe in the worklist.
118   for (unsigned I = 0; I != WorkList.size(); ++I) {
119     VPBasicBlock *SinkTo;
120     VPRecipeBase *SinkCandidate;
121     std::tie(SinkTo, SinkCandidate) = WorkList[I];
122     if (SinkCandidate->getParent() == SinkTo ||
123         SinkCandidate->mayHaveSideEffects() ||
124         SinkCandidate->mayReadOrWriteMemory())
125       continue;
126     if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
127       if (!ScalarVFOnly && RepR->isUniform())
128         continue;
129     } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
130       continue;
131 
132     bool NeedsDuplicating = false;
133     // All recipe users of the sink candidate must be in the same block SinkTo
134     // or all users outside of SinkTo must be uniform-after-vectorization (
135     // i.e., only first lane is used) . In the latter case, we need to duplicate
136     // SinkCandidate.
137     auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
138                             SinkCandidate](VPUser *U) {
139       auto *UI = dyn_cast<VPRecipeBase>(U);
140       if (!UI)
141         return false;
142       if (UI->getParent() == SinkTo)
143         return true;
144       NeedsDuplicating =
145           UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue());
146       // We only know how to duplicate VPRecipeRecipes for now.
147       return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
148     };
149     if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser))
150       continue;
151 
152     if (NeedsDuplicating) {
153       if (ScalarVFOnly)
154         continue;
155       Instruction *I = cast<Instruction>(
156           cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue());
157       auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
158       // TODO: add ".cloned" suffix to name of Clone's VPValue.
159 
160       Clone->insertBefore(SinkCandidate);
161       for (auto *U : to_vector(SinkCandidate->getVPSingleValue()->users())) {
162         auto *UI = cast<VPRecipeBase>(U);
163         if (UI->getParent() == SinkTo)
164           continue;
165 
166         for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) {
167           if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue())
168             continue;
169           UI->setOperand(Idx, Clone);
170         }
171       }
172     }
173     SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
174     for (VPValue *Op : SinkCandidate->operands())
175       if (auto *Def = Op->getDefiningRecipe())
176         WorkList.insert(std::make_pair(SinkTo, Def));
177     Changed = true;
178   }
179   return Changed;
180 }
181 
182 /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
183 /// the mask.
184 VPValue *getPredicatedMask(VPRegionBlock *R) {
185   auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
186   if (!EntryBB || EntryBB->size() != 1 ||
187       !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
188     return nullptr;
189 
190   return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
191 }
192 
193 /// If \p R is a triangle region, return the 'then' block of the triangle.
194 static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
195   auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
196   if (EntryBB->getNumSuccessors() != 2)
197     return nullptr;
198 
199   auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
200   auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
201   if (!Succ0 || !Succ1)
202     return nullptr;
203 
204   if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
205     return nullptr;
206   if (Succ0->getSingleSuccessor() == Succ1)
207     return Succ0;
208   if (Succ1->getSingleSuccessor() == Succ0)
209     return Succ1;
210   return nullptr;
211 }
212 
213 // Merge replicate regions in their successor region, if a replicate region
214 // is connected to a successor replicate region with the same predicate by a
215 // single, empty VPBasicBlock.
216 static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
217   SetVector<VPRegionBlock *> DeletedRegions;
218 
219   // Collect replicate regions followed by an empty block, followed by another
220   // replicate region with matching masks to process front. This is to avoid
221   // iterator invalidation issues while merging regions.
222   SmallVector<VPRegionBlock *, 8> WorkList;
223   for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
224            vp_depth_first_deep(Plan.getEntry()))) {
225     if (!Region1->isReplicator())
226       continue;
227     auto *MiddleBasicBlock =
228         dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
229     if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
230       continue;
231 
232     auto *Region2 =
233         dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
234     if (!Region2 || !Region2->isReplicator())
235       continue;
236 
237     VPValue *Mask1 = getPredicatedMask(Region1);
238     VPValue *Mask2 = getPredicatedMask(Region2);
239     if (!Mask1 || Mask1 != Mask2)
240       continue;
241 
242     assert(Mask1 && Mask2 && "both region must have conditions");
243     WorkList.push_back(Region1);
244   }
245 
246   // Move recipes from Region1 to its successor region, if both are triangles.
247   for (VPRegionBlock *Region1 : WorkList) {
248     if (DeletedRegions.contains(Region1))
249       continue;
250     auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
251     auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
252 
253     VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
254     VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
255     if (!Then1 || !Then2)
256       continue;
257 
258     // Note: No fusion-preventing memory dependencies are expected in either
259     // region. Such dependencies should be rejected during earlier dependence
260     // checks, which guarantee accesses can be re-ordered for vectorization.
261     //
262     // Move recipes to the successor region.
263     for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
264       ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
265 
266     auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
267     auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
268 
269     // Move VPPredInstPHIRecipes from the merge block to the successor region's
270     // merge block. Update all users inside the successor region to use the
271     // original values.
272     for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
273       VPValue *PredInst1 =
274           cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
275       VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
276       for (VPUser *U : to_vector(Phi1ToMoveV->users())) {
277         auto *UI = dyn_cast<VPRecipeBase>(U);
278         if (!UI || UI->getParent() != Then2)
279           continue;
280         for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
281           if (Phi1ToMoveV != U->getOperand(I))
282             continue;
283           U->setOperand(I, PredInst1);
284         }
285       }
286 
287       Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
288     }
289 
290     // Finally, remove the first region.
291     for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
292       VPBlockUtils::disconnectBlocks(Pred, Region1);
293       VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
294     }
295     VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
296     DeletedRegions.insert(Region1);
297   }
298 
299   for (VPRegionBlock *ToDelete : DeletedRegions)
300     delete ToDelete;
301   return !DeletedRegions.empty();
302 }
303 
304 static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
305                                             VPlan &Plan) {
306   Instruction *Instr = PredRecipe->getUnderlyingInstr();
307   // Build the triangular if-then region.
308   std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
309   assert(Instr->getParent() && "Predicated instruction not in any basic block");
310   auto *BlockInMask = PredRecipe->getMask();
311   auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
312   auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
313 
314   // Replace predicated replicate recipe with a replicate recipe without a
315   // mask but in the replicate region.
316   auto *RecipeWithoutMask = new VPReplicateRecipe(
317       PredRecipe->getUnderlyingInstr(),
318       make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
319       PredRecipe->isUniform());
320   auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
321 
322   VPPredInstPHIRecipe *PHIRecipe = nullptr;
323   if (PredRecipe->getNumUsers() != 0) {
324     PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask);
325     PredRecipe->replaceAllUsesWith(PHIRecipe);
326     PHIRecipe->setOperand(0, RecipeWithoutMask);
327   }
328   PredRecipe->eraseFromParent();
329   auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
330   VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
331 
332   // Note: first set Entry as region entry and then connect successors starting
333   // from it in order, to propagate the "parent" of each VPBasicBlock.
334   VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
335   VPBlockUtils::connectBlocks(Pred, Exiting);
336 
337   return Region;
338 }
339 
340 static void addReplicateRegions(VPlan &Plan) {
341   SmallVector<VPReplicateRecipe *> WorkList;
342   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
343            vp_depth_first_deep(Plan.getEntry()))) {
344     for (VPRecipeBase &R : *VPBB)
345       if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
346         if (RepR->isPredicated())
347           WorkList.push_back(RepR);
348       }
349   }
350 
351   unsigned BBNum = 0;
352   for (VPReplicateRecipe *RepR : WorkList) {
353     VPBasicBlock *CurrentBlock = RepR->getParent();
354     VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());
355 
356     BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();
357     SplitBlock->setName(
358         OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
359     // Record predicated instructions for above packing optimizations.
360     VPBlockBase *Region = createReplicateRegion(RepR, Plan);
361     Region->setParent(CurrentBlock->getParent());
362     VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock);
363     VPBlockUtils::connectBlocks(CurrentBlock, Region);
364     VPBlockUtils::connectBlocks(Region, SplitBlock);
365   }
366 }
367 
368 void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) {
369   // Convert masked VPReplicateRecipes to if-then region blocks.
370   addReplicateRegions(Plan);
371 
372   bool ShouldSimplify = true;
373   while (ShouldSimplify) {
374     ShouldSimplify = sinkScalarOperands(Plan);
375     ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);
376     ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(Plan);
377   }
378 }
379 bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) {
380   SmallVector<VPBasicBlock *> WorkList;
381   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
382            vp_depth_first_deep(Plan.getEntry()))) {
383     auto *PredVPBB =
384         dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
385     if (PredVPBB && PredVPBB->getNumSuccessors() == 1)
386       WorkList.push_back(VPBB);
387   }
388 
389   for (VPBasicBlock *VPBB : WorkList) {
390     VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
391     for (VPRecipeBase &R : make_early_inc_range(*VPBB))
392       R.moveBefore(*PredVPBB, PredVPBB->end());
393     VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
394     auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());
395     if (ParentRegion && ParentRegion->getExiting() == VPBB)
396       ParentRegion->setExiting(PredVPBB);
397     for (auto *Succ : to_vector(VPBB->successors())) {
398       VPBlockUtils::disconnectBlocks(VPBB, Succ);
399       VPBlockUtils::connectBlocks(PredVPBB, Succ);
400     }
401     delete VPBB;
402   }
403   return !WorkList.empty();
404 }
405 
406 void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
407   for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
408     auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
409     if (!IV || IV->getTruncInst())
410       continue;
411 
412     // A sequence of IR Casts has potentially been recorded for IV, which
413     // *must be bypassed* when the IV is vectorized, because the vectorized IV
414     // will produce the desired casted value. This sequence forms a def-use
415     // chain and is provided in reverse order, ending with the cast that uses
416     // the IV phi. Search for the recipe of the last cast in the chain and
417     // replace it with the original IV. Note that only the final cast is
418     // expected to have users outside the cast-chain and the dead casts left
419     // over will be cleaned up later.
420     auto &Casts = IV->getInductionDescriptor().getCastInsts();
421     VPValue *FindMyCast = IV;
422     for (Instruction *IRCast : reverse(Casts)) {
423       VPRecipeBase *FoundUserCast = nullptr;
424       for (auto *U : FindMyCast->users()) {
425         auto *UserCast = cast<VPRecipeBase>(U);
426         if (UserCast->getNumDefinedValues() == 1 &&
427             UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) {
428           FoundUserCast = UserCast;
429           break;
430         }
431       }
432       FindMyCast = FoundUserCast->getVPSingleValue();
433     }
434     FindMyCast->replaceAllUsesWith(IV);
435   }
436 }
437 
438 void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
439   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
440   VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
441   for (VPUser *U : CanonicalIV->users()) {
442     WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
443     if (WidenNewIV)
444       break;
445   }
446 
447   if (!WidenNewIV)
448     return;
449 
450   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
451   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
452     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
453 
454     if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
455         WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
456       continue;
457 
458     // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
459     // everything WidenNewIV's users need. That is, WidenOriginalIV will
460     // generate a vector phi or all users of WidenNewIV demand the first lane
461     // only.
462     if (any_of(WidenOriginalIV->users(),
463                [WidenOriginalIV](VPUser *U) {
464                  return !U->usesScalars(WidenOriginalIV);
465                }) ||
466         vputils::onlyFirstLaneUsed(WidenNewIV)) {
467       WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
468       WidenNewIV->eraseFromParent();
469       return;
470     }
471   }
472 }
473 
474 void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
475   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
476       Plan.getEntry());
477 
478   for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
479     // The recipes in the block are processed in reverse order, to catch chains
480     // of dead recipes.
481     for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
482       if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) {
483             return V->getNumUsers() > 0;
484           }))
485         continue;
486       R.eraseFromParent();
487     }
488   }
489 }
490 
491 void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
492   SmallVector<VPRecipeBase *> ToRemove;
493   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
494   bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
495   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
496     auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
497     if (!WideIV)
498       continue;
499     if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
500           return U->usesScalars(WideIV);
501         }))
502       continue;
503 
504     auto IP = HeaderVPBB->getFirstNonPhi();
505     VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
506     Type *ResultTy = WideIV->getPHINode()->getType();
507     if (Instruction *TruncI = WideIV->getTruncInst())
508       ResultTy = TruncI->getType();
509     const InductionDescriptor &ID = WideIV->getInductionDescriptor();
510     VPValue *Step = WideIV->getStepValue();
511     VPValue *BaseIV = CanonicalIV;
512     if (!CanonicalIV->isCanonical(ID.getKind(), WideIV->getStartValue(), Step,
513                                   ResultTy)) {
514       BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV,
515                                      Step, ResultTy);
516       HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
517     }
518 
519     VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
520     HeaderVPBB->insert(Steps, IP);
521 
522     // Update scalar users of IV to use Step instead. Use SetVector to ensure
523     // the list of users doesn't contain duplicates.
524     SetVector<VPUser *> Users(WideIV->user_begin(), WideIV->user_end());
525     for (VPUser *U : Users) {
526       if (HasOnlyVectorVFs && !U->usesScalars(WideIV))
527         continue;
528       for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) {
529         if (U->getOperand(I) != WideIV)
530           continue;
531         U->setOperand(I, Steps);
532       }
533     }
534   }
535 }
536 
537 void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) {
538   DenseMap<const SCEV *, VPValue *> SCEV2VPV;
539 
540   for (VPRecipeBase &R :
541        make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) {
542     auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
543     if (!ExpR)
544       continue;
545 
546     auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});
547     if (I.second)
548       continue;
549     ExpR->replaceAllUsesWith(I.first->second);
550     ExpR->eraseFromParent();
551   }
552 }
553 
554 static bool canSimplifyBranchOnCond(VPInstruction *Term) {
555   VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
556   if (!Not || Not->getOpcode() != VPInstruction::Not)
557     return false;
558 
559   VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
560   return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
561 }
562 
563 void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
564                                          unsigned BestUF,
565                                          PredicatedScalarEvolution &PSE) {
566   assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
567   assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
568   VPBasicBlock *ExitingVPBB =
569       Plan.getVectorLoopRegion()->getExitingBasicBlock();
570   auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
571   // Try to simplify the branch condition if TC <= VF * UF when preparing to
572   // execute the plan for the main vector loop. We only do this if the
573   // terminator is:
574   //  1. BranchOnCount, or
575   //  2. BranchOnCond where the input is Not(ActiveLaneMask).
576   if (!Term || (Term->getOpcode() != VPInstruction::BranchOnCount &&
577                 (Term->getOpcode() != VPInstruction::BranchOnCond ||
578                  !canSimplifyBranchOnCond(Term))))
579     return;
580 
581   Type *IdxTy =
582       Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
583   const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
584   ScalarEvolution &SE = *PSE.getSE();
585   const SCEV *C =
586       SE.getConstant(TripCount->getType(), BestVF.getKnownMinValue() * BestUF);
587   if (TripCount->isZero() ||
588       !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
589     return;
590 
591   LLVMContext &Ctx = SE.getContext();
592   auto *BOC = new VPInstruction(
593       VPInstruction::BranchOnCond,
594       {Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(Ctx))});
595   Term->eraseFromParent();
596   ExitingVPBB->appendRecipe(BOC);
597   Plan.setVF(BestVF);
598   Plan.setUF(BestUF);
599   // TODO: Further simplifications are possible
600   //      1. Replace inductions with constants.
601   //      2. Replace vector loop region with VPBasicBlock.
602 }
603 
604 #ifndef NDEBUG
605 static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) {
606   auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
607   if (Region && Region->isReplicator()) {
608     assert(Region->getNumSuccessors() == 1 &&
609            Region->getNumPredecessors() == 1 && "Expected SESE region!");
610     assert(R->getParent()->size() == 1 &&
611            "A recipe in an original replicator region must be the only "
612            "recipe in its block");
613     return Region;
614   }
615   return nullptr;
616 }
617 #endif
618 
619 static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
620                               VPDominatorTree &VPDT) {
621   if (A == B)
622     return false;
623 
624   auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
625     for (auto &R : *A->getParent()) {
626       if (&R == A)
627         return true;
628       if (&R == B)
629         return false;
630     }
631     llvm_unreachable("recipe not found");
632   };
633   const VPBlockBase *ParentA = A->getParent();
634   const VPBlockBase *ParentB = B->getParent();
635   if (ParentA == ParentB)
636     return LocalComesBefore(A, B);
637 
638   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
639          "No replicate regions expected at this point");
640   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
641          "No replicate regions expected at this point");
642   return VPDT.properlyDominates(ParentA, ParentB);
643 }
644 
645 /// Sink users of \p FOR after the recipe defining the previous value \p
646 /// Previous of the recurrence. \returns true if all users of \p FOR could be
647 /// re-arranged as needed or false if it is not possible.
648 static bool
649 sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
650                                  VPRecipeBase *Previous,
651                                  VPDominatorTree &VPDT) {
652   // Collect recipes that need sinking.
653   SmallVector<VPRecipeBase *> WorkList;
654   SmallPtrSet<VPRecipeBase *, 8> Seen;
655   Seen.insert(Previous);
656   auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
657     // The previous value must not depend on the users of the recurrence phi. In
658     // that case, FOR is not a fixed order recurrence.
659     if (SinkCandidate == Previous)
660       return false;
661 
662     if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
663         !Seen.insert(SinkCandidate).second ||
664         properlyDominates(Previous, SinkCandidate, VPDT))
665       return true;
666 
667     if (SinkCandidate->mayHaveSideEffects())
668       return false;
669 
670     WorkList.push_back(SinkCandidate);
671     return true;
672   };
673 
674   // Recursively sink users of FOR after Previous.
675   WorkList.push_back(FOR);
676   for (unsigned I = 0; I != WorkList.size(); ++I) {
677     VPRecipeBase *Current = WorkList[I];
678     assert(Current->getNumDefinedValues() == 1 &&
679            "only recipes with a single defined value expected");
680 
681     for (VPUser *User : Current->getVPSingleValue()->users()) {
682       if (auto *R = dyn_cast<VPRecipeBase>(User))
683         if (!TryToPushSinkCandidate(R))
684           return false;
685     }
686   }
687 
688   // Keep recipes to sink ordered by dominance so earlier instructions are
689   // processed first.
690   sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
691     return properlyDominates(A, B, VPDT);
692   });
693 
694   for (VPRecipeBase *SinkCandidate : WorkList) {
695     if (SinkCandidate == FOR)
696       continue;
697 
698     SinkCandidate->moveAfter(Previous);
699     Previous = SinkCandidate;
700   }
701   return true;
702 }
703 
704 bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
705                                                   VPBuilder &Builder) {
706   VPDominatorTree VPDT;
707   VPDT.recalculate(Plan);
708 
709   SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
710   for (VPRecipeBase &R :
711        Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis())
712     if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
713       RecurrencePhis.push_back(FOR);
714 
715   for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
716     SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
717     VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
718     // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
719     // to terminate.
720     while (auto *PrevPhi =
721                dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
722       assert(PrevPhi->getParent() == FOR->getParent());
723       assert(SeenPhis.insert(PrevPhi).second);
724       Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
725     }
726 
727     if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
728       return false;
729 
730     // Introduce a recipe to combine the incoming and previous values of a
731     // fixed-order recurrence.
732     VPBasicBlock *InsertBlock = Previous->getParent();
733     if (isa<VPHeaderPHIRecipe>(Previous))
734       Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
735     else
736       Builder.setInsertPoint(InsertBlock, std::next(Previous->getIterator()));
737 
738     auto *RecurSplice = cast<VPInstruction>(
739         Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
740                              {FOR, FOR->getBackedgeValue()}));
741 
742     FOR->replaceAllUsesWith(RecurSplice);
743     // Set the first operand of RecurSplice to FOR again, after replacing
744     // all users.
745     RecurSplice->setOperand(0, FOR);
746   }
747   return true;
748 }
749 
750 void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
751   for (VPRecipeBase &R :
752        Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
753     auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
754     if (!PhiR)
755       continue;
756     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
757     RecurKind RK = RdxDesc.getRecurrenceKind();
758     if (RK != RecurKind::Add && RK != RecurKind::Mul)
759       continue;
760 
761     SmallSetVector<VPValue *, 8> Worklist;
762     Worklist.insert(PhiR);
763 
764     for (unsigned I = 0; I != Worklist.size(); ++I) {
765       VPValue *Cur = Worklist[I];
766       if (auto *RecWithFlags =
767               dyn_cast<VPRecipeWithIRFlags>(Cur->getDefiningRecipe())) {
768         RecWithFlags->dropPoisonGeneratingFlags();
769       }
770 
771       for (VPUser *U : Cur->users()) {
772         auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
773         if (!UserRecipe)
774           continue;
775         for (VPValue *V : UserRecipe->definedValues())
776           Worklist.insert(V);
777       }
778     }
779   }
780 }
781