1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Analysis/IVDescriptors.h"
19 #include "llvm/IR/BasicBlock.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/IR/Value.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
30 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
31 #include <cassert>
32 
33 using namespace llvm;
34 
35 using VectorParts = SmallVector<Value *, 2>;
36 
37 extern cl::opt<bool> EnableVPlanNativePath;
38 
39 #define LV_NAME "loop-vectorize"
40 #define DEBUG_TYPE LV_NAME
41 
42 bool VPRecipeBase::mayWriteToMemory() const {
43   switch (getVPDefID()) {
44   case VPWidenMemoryInstructionSC: {
45     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
46   }
47   case VPReplicateSC:
48   case VPWidenCallSC:
49     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
50         ->mayWriteToMemory();
51   case VPBranchOnMaskSC:
52     return false;
53   case VPWidenIntOrFpInductionSC:
54   case VPWidenCanonicalIVSC:
55   case VPWidenPHISC:
56   case VPBlendSC:
57   case VPWidenSC:
58   case VPWidenGEPSC:
59   case VPReductionSC:
60   case VPWidenSelectSC: {
61     const Instruction *I =
62         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
63     (void)I;
64     assert((!I || !I->mayWriteToMemory()) &&
65            "underlying instruction may write to memory");
66     return false;
67   }
68   default:
69     return true;
70   }
71 }
72 
73 bool VPRecipeBase::mayReadFromMemory() const {
74   switch (getVPDefID()) {
75   case VPWidenMemoryInstructionSC: {
76     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
77   }
78   case VPReplicateSC:
79   case VPWidenCallSC:
80     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
81         ->mayReadFromMemory();
82   case VPBranchOnMaskSC:
83     return false;
84   case VPWidenIntOrFpInductionSC:
85   case VPWidenCanonicalIVSC:
86   case VPWidenPHISC:
87   case VPBlendSC:
88   case VPWidenSC:
89   case VPWidenGEPSC:
90   case VPReductionSC:
91   case VPWidenSelectSC: {
92     const Instruction *I =
93         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
94     (void)I;
95     assert((!I || !I->mayReadFromMemory()) &&
96            "underlying instruction may read from memory");
97     return false;
98   }
99   default:
100     return true;
101   }
102 }
103 
104 bool VPRecipeBase::mayHaveSideEffects() const {
105   switch (getVPDefID()) {
106   case VPWidenIntOrFpInductionSC:
107   case VPWidenPointerInductionSC:
108   case VPWidenCanonicalIVSC:
109   case VPWidenPHISC:
110   case VPBlendSC:
111   case VPWidenSC:
112   case VPWidenGEPSC:
113   case VPReductionSC:
114   case VPWidenSelectSC:
115   case VPScalarIVStepsSC: {
116     const Instruction *I =
117         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
118     (void)I;
119     assert((!I || !I->mayHaveSideEffects()) &&
120            "underlying instruction has side-effects");
121     return false;
122   }
123   case VPReplicateSC: {
124     auto *R = cast<VPReplicateRecipe>(this);
125     return R->getUnderlyingInstr()->mayHaveSideEffects();
126   }
127   default:
128     return true;
129   }
130 }
131 
132 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
133   auto Lane = VPLane::getLastLaneForVF(State.VF);
134   VPValue *ExitValue = getOperand(0);
135   if (Plan.isUniformAfterVectorization(ExitValue))
136     Lane = VPLane::getFirstLane();
137   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
138                    State.Builder.GetInsertBlock());
139 }
140 
141 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
142   assert(!Parent && "Recipe already in some VPBasicBlock");
143   assert(InsertPos->getParent() &&
144          "Insertion position not in any VPBasicBlock");
145   Parent = InsertPos->getParent();
146   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
147 }
148 
149 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
150                                 iplist<VPRecipeBase>::iterator I) {
151   assert(!Parent && "Recipe already in some VPBasicBlock");
152   assert(I == BB.end() || I->getParent() == &BB);
153   Parent = &BB;
154   BB.getRecipeList().insert(I, this);
155 }
156 
157 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
158   assert(!Parent && "Recipe already in some VPBasicBlock");
159   assert(InsertPos->getParent() &&
160          "Insertion position not in any VPBasicBlock");
161   Parent = InsertPos->getParent();
162   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
163 }
164 
165 void VPRecipeBase::removeFromParent() {
166   assert(getParent() && "Recipe not in any VPBasicBlock");
167   getParent()->getRecipeList().remove(getIterator());
168   Parent = nullptr;
169 }
170 
171 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
172   assert(getParent() && "Recipe not in any VPBasicBlock");
173   return getParent()->getRecipeList().erase(getIterator());
174 }
175 
176 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
177   removeFromParent();
178   insertAfter(InsertPos);
179 }
180 
181 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
182                               iplist<VPRecipeBase>::iterator I) {
183   removeFromParent();
184   insertBefore(BB, I);
185 }
186 
187 void VPInstruction::generateInstruction(VPTransformState &State,
188                                         unsigned Part) {
189   IRBuilderBase &Builder = State.Builder;
190   Builder.SetCurrentDebugLocation(DL);
191 
192   if (Instruction::isBinaryOp(getOpcode())) {
193     Value *A = State.get(getOperand(0), Part);
194     Value *B = State.get(getOperand(1), Part);
195     Value *V =
196         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
197     State.set(this, V, Part);
198     return;
199   }
200 
201   switch (getOpcode()) {
202   case VPInstruction::Not: {
203     Value *A = State.get(getOperand(0), Part);
204     Value *V = Builder.CreateNot(A, Name);
205     State.set(this, V, Part);
206     break;
207   }
208   case VPInstruction::ICmpULE: {
209     Value *IV = State.get(getOperand(0), Part);
210     Value *TC = State.get(getOperand(1), Part);
211     Value *V = Builder.CreateICmpULE(IV, TC, Name);
212     State.set(this, V, Part);
213     break;
214   }
215   case Instruction::Select: {
216     Value *Cond = State.get(getOperand(0), Part);
217     Value *Op1 = State.get(getOperand(1), Part);
218     Value *Op2 = State.get(getOperand(2), Part);
219     Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
220     State.set(this, V, Part);
221     break;
222   }
223   case VPInstruction::ActiveLaneMask: {
224     // Get first lane of vector induction variable.
225     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
226     // Get the original loop tripcount.
227     Value *ScalarTC = State.get(getOperand(1), Part);
228 
229     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
230     auto *PredTy = VectorType::get(Int1Ty, State.VF);
231     Instruction *Call = Builder.CreateIntrinsic(
232         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
233         {VIVElem0, ScalarTC}, nullptr, Name);
234     State.set(this, Call, Part);
235     break;
236   }
237   case VPInstruction::FirstOrderRecurrenceSplice: {
238     // Generate code to combine the previous and current values in vector v3.
239     //
240     //   vector.ph:
241     //     v_init = vector(..., ..., ..., a[-1])
242     //     br vector.body
243     //
244     //   vector.body
245     //     i = phi [0, vector.ph], [i+4, vector.body]
246     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
247     //     v2 = a[i, i+1, i+2, i+3];
248     //     v3 = vector(v1(3), v2(0, 1, 2))
249 
250     // For the first part, use the recurrence phi (v1), otherwise v2.
251     auto *V1 = State.get(getOperand(0), 0);
252     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
253     if (!PartMinus1->getType()->isVectorTy()) {
254       State.set(this, PartMinus1, Part);
255     } else {
256       Value *V2 = State.get(getOperand(1), Part);
257       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
258                 Part);
259     }
260     break;
261   }
262   case VPInstruction::CanonicalIVIncrement:
263   case VPInstruction::CanonicalIVIncrementNUW: {
264     Value *Next = nullptr;
265     if (Part == 0) {
266       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
267       auto *Phi = State.get(getOperand(0), 0);
268       // The loop step is equal to the vectorization factor (num of SIMD
269       // elements) times the unroll factor (num of SIMD instructions).
270       Value *Step =
271           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
272       Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
273     } else {
274       Next = State.get(this, 0);
275     }
276 
277     State.set(this, Next, Part);
278     break;
279   }
280 
281   case VPInstruction::CanonicalIVIncrementForPart:
282   case VPInstruction::CanonicalIVIncrementForPartNUW: {
283     bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
284     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
285     if (Part == 0) {
286       State.set(this, IV, Part);
287       break;
288     }
289 
290     // The canonical IV is incremented by the vectorization factor (num of SIMD
291     // elements) times the unroll part.
292     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
293     Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
294     State.set(this, Next, Part);
295     break;
296   }
297   case VPInstruction::BranchOnCond: {
298     if (Part != 0)
299       break;
300 
301     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
302     VPRegionBlock *ParentRegion = getParent()->getParent();
303     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
304 
305     // Replace the temporary unreachable terminator with a new conditional
306     // branch, hooking it up to backward destination for exiting blocks now and
307     // to forward destination(s) later when they are created.
308     BranchInst *CondBr =
309         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
310 
311     if (getParent()->isExiting())
312       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
313 
314     CondBr->setSuccessor(0, nullptr);
315     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
316     break;
317   }
318   case VPInstruction::BranchOnCount: {
319     if (Part != 0)
320       break;
321     // First create the compare.
322     Value *IV = State.get(getOperand(0), Part);
323     Value *TC = State.get(getOperand(1), Part);
324     Value *Cond = Builder.CreateICmpEQ(IV, TC);
325 
326     // Now create the branch.
327     auto *Plan = getParent()->getPlan();
328     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
329     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
330 
331     // Replace the temporary unreachable terminator with a new conditional
332     // branch, hooking it up to backward destination (the header) now and to the
333     // forward destination (the exit/middle block) later when it is created.
334     // Note that CreateCondBr expects a valid BB as first argument, so we need
335     // to set it to nullptr later.
336     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
337                                               State.CFG.VPBB2IRBB[Header]);
338     CondBr->setSuccessor(0, nullptr);
339     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
340     break;
341   }
342   default:
343     llvm_unreachable("Unsupported opcode for instruction");
344   }
345 }
346 
347 void VPInstruction::execute(VPTransformState &State) {
348   assert(!State.Instance && "VPInstruction executing an Instance");
349   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
350   State.Builder.setFastMathFlags(FMF);
351   for (unsigned Part = 0; Part < State.UF; ++Part)
352     generateInstruction(State, Part);
353 }
354 
355 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
356 void VPInstruction::dump() const {
357   VPSlotTracker SlotTracker(getParent()->getPlan());
358   print(dbgs(), "", SlotTracker);
359 }
360 
361 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
362                           VPSlotTracker &SlotTracker) const {
363   O << Indent << "EMIT ";
364 
365   if (hasResult()) {
366     printAsOperand(O, SlotTracker);
367     O << " = ";
368   }
369 
370   switch (getOpcode()) {
371   case VPInstruction::Not:
372     O << "not";
373     break;
374   case VPInstruction::ICmpULE:
375     O << "icmp ule";
376     break;
377   case VPInstruction::SLPLoad:
378     O << "combined load";
379     break;
380   case VPInstruction::SLPStore:
381     O << "combined store";
382     break;
383   case VPInstruction::ActiveLaneMask:
384     O << "active lane mask";
385     break;
386   case VPInstruction::FirstOrderRecurrenceSplice:
387     O << "first-order splice";
388     break;
389   case VPInstruction::CanonicalIVIncrement:
390     O << "VF * UF + ";
391     break;
392   case VPInstruction::CanonicalIVIncrementNUW:
393     O << "VF * UF +(nuw) ";
394     break;
395   case VPInstruction::BranchOnCond:
396     O << "branch-on-cond";
397     break;
398   case VPInstruction::CanonicalIVIncrementForPart:
399     O << "VF * Part + ";
400     break;
401   case VPInstruction::CanonicalIVIncrementForPartNUW:
402     O << "VF * Part +(nuw) ";
403     break;
404   case VPInstruction::BranchOnCount:
405     O << "branch-on-count ";
406     break;
407   default:
408     O << Instruction::getOpcodeName(getOpcode());
409   }
410 
411   O << FMF;
412 
413   for (const VPValue *Operand : operands()) {
414     O << " ";
415     Operand->printAsOperand(O, SlotTracker);
416   }
417 
418   if (DL) {
419     O << ", !dbg ";
420     DL.print(O);
421   }
422 }
423 #endif
424 
425 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
426   // Make sure the VPInstruction is a floating-point operation.
427   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
428           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
429           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
430           Opcode == Instruction::FCmp) &&
431          "this op can't take fast-math flags");
432   FMF = FMFNew;
433 }
434 
435 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
436 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
437                               VPSlotTracker &SlotTracker) const {
438   O << Indent << "WIDEN-CALL ";
439 
440   auto *CI = cast<CallInst>(getUnderlyingInstr());
441   if (CI->getType()->isVoidTy())
442     O << "void ";
443   else {
444     printAsOperand(O, SlotTracker);
445     O << " = ";
446   }
447 
448   O << "call @" << CI->getCalledFunction()->getName() << "(";
449   printOperands(O, SlotTracker);
450   O << ")";
451 }
452 
453 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
454                                 VPSlotTracker &SlotTracker) const {
455   O << Indent << "WIDEN-SELECT ";
456   printAsOperand(O, SlotTracker);
457   O << " = select ";
458   getOperand(0)->printAsOperand(O, SlotTracker);
459   O << ", ";
460   getOperand(1)->printAsOperand(O, SlotTracker);
461   O << ", ";
462   getOperand(2)->printAsOperand(O, SlotTracker);
463   O << (InvariantCond ? " (condition is loop invariant)" : "");
464 }
465 #endif
466 
467 void VPWidenSelectRecipe::execute(VPTransformState &State) {
468   auto &I = *cast<SelectInst>(getUnderlyingInstr());
469   State.setDebugLocFromInst(&I);
470 
471   // The condition can be loop invariant but still defined inside the
472   // loop. This means that we can't just use the original 'cond' value.
473   // We have to take the 'vectorized' value and pick the first lane.
474   // Instcombine will make this a no-op.
475   auto *InvarCond =
476       InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
477 
478   for (unsigned Part = 0; Part < State.UF; ++Part) {
479     Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
480     Value *Op0 = State.get(getOperand(1), Part);
481     Value *Op1 = State.get(getOperand(2), Part);
482     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
483     State.set(this, Sel, Part);
484     State.addMetadata(Sel, &I);
485   }
486 }
487 
488 void VPWidenRecipe::execute(VPTransformState &State) {
489   auto &I = *cast<Instruction>(getUnderlyingValue());
490   auto &Builder = State.Builder;
491   switch (I.getOpcode()) {
492   case Instruction::Call:
493   case Instruction::Br:
494   case Instruction::PHI:
495   case Instruction::GetElementPtr:
496   case Instruction::Select:
497     llvm_unreachable("This instruction is handled by a different recipe.");
498   case Instruction::UDiv:
499   case Instruction::SDiv:
500   case Instruction::SRem:
501   case Instruction::URem:
502   case Instruction::Add:
503   case Instruction::FAdd:
504   case Instruction::Sub:
505   case Instruction::FSub:
506   case Instruction::FNeg:
507   case Instruction::Mul:
508   case Instruction::FMul:
509   case Instruction::FDiv:
510   case Instruction::FRem:
511   case Instruction::Shl:
512   case Instruction::LShr:
513   case Instruction::AShr:
514   case Instruction::And:
515   case Instruction::Or:
516   case Instruction::Xor: {
517     // Just widen unops and binops.
518     State.setDebugLocFromInst(&I);
519 
520     for (unsigned Part = 0; Part < State.UF; ++Part) {
521       SmallVector<Value *, 2> Ops;
522       for (VPValue *VPOp : operands())
523         Ops.push_back(State.get(VPOp, Part));
524 
525       Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
526 
527       if (auto *VecOp = dyn_cast<Instruction>(V)) {
528         VecOp->copyIRFlags(&I);
529 
530         // If the instruction is vectorized and was in a basic block that needed
531         // predication, we can't propagate poison-generating flags (nuw/nsw,
532         // exact, etc.). The control flow has been linearized and the
533         // instruction is no longer guarded by the predicate, which could make
534         // the flag properties to no longer hold.
535         if (State.MayGeneratePoisonRecipes.contains(this))
536           VecOp->dropPoisonGeneratingFlags();
537       }
538 
539       // Use this vector value for all users of the original instruction.
540       State.set(this, V, Part);
541       State.addMetadata(V, &I);
542     }
543 
544     break;
545   }
546   case Instruction::Freeze: {
547     State.setDebugLocFromInst(&I);
548 
549     for (unsigned Part = 0; Part < State.UF; ++Part) {
550       Value *Op = State.get(getOperand(0), Part);
551 
552       Value *Freeze = Builder.CreateFreeze(Op);
553       State.set(this, Freeze, Part);
554     }
555     break;
556   }
557   case Instruction::ICmp:
558   case Instruction::FCmp: {
559     // Widen compares. Generate vector compares.
560     bool FCmp = (I.getOpcode() == Instruction::FCmp);
561     auto *Cmp = cast<CmpInst>(&I);
562     State.setDebugLocFromInst(Cmp);
563     for (unsigned Part = 0; Part < State.UF; ++Part) {
564       Value *A = State.get(getOperand(0), Part);
565       Value *B = State.get(getOperand(1), Part);
566       Value *C = nullptr;
567       if (FCmp) {
568         // Propagate fast math flags.
569         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
570         Builder.setFastMathFlags(Cmp->getFastMathFlags());
571         C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
572       } else {
573         C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
574       }
575       State.set(this, C, Part);
576       State.addMetadata(C, &I);
577     }
578 
579     break;
580   }
581 
582   case Instruction::ZExt:
583   case Instruction::SExt:
584   case Instruction::FPToUI:
585   case Instruction::FPToSI:
586   case Instruction::FPExt:
587   case Instruction::PtrToInt:
588   case Instruction::IntToPtr:
589   case Instruction::SIToFP:
590   case Instruction::UIToFP:
591   case Instruction::Trunc:
592   case Instruction::FPTrunc:
593   case Instruction::BitCast: {
594     auto *CI = cast<CastInst>(&I);
595     State.setDebugLocFromInst(CI);
596 
597     /// Vectorize casts.
598     Type *DestTy = (State.VF.isScalar())
599                        ? CI->getType()
600                        : VectorType::get(CI->getType(), State.VF);
601 
602     for (unsigned Part = 0; Part < State.UF; ++Part) {
603       Value *A = State.get(getOperand(0), Part);
604       Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
605       State.set(this, Cast, Part);
606       State.addMetadata(Cast, &I);
607     }
608     break;
609   }
610   default:
611     // This instruction is not vectorized by simple widening.
612     LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
613     llvm_unreachable("Unhandled instruction!");
614   } // end of switch.
615 }
616 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
617 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
618                           VPSlotTracker &SlotTracker) const {
619   O << Indent << "WIDEN ";
620   printAsOperand(O, SlotTracker);
621   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
622   printOperands(O, SlotTracker);
623 }
624 
625 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
626                                           VPSlotTracker &SlotTracker) const {
627   O << Indent << "WIDEN-INDUCTION";
628   if (getTruncInst()) {
629     O << "\\l\"";
630     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
631     O << " +\n" << Indent << "\"  ";
632     getVPValue(0)->printAsOperand(O, SlotTracker);
633   } else
634     O << " " << VPlanIngredient(IV);
635 
636   O << ", ";
637   getStepValue()->printAsOperand(O, SlotTracker);
638 }
639 #endif
640 
641 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
642   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
643   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
644   return StartC && StartC->isZero() && StepC && StepC->isOne();
645 }
646 
647 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
648   return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
649 }
650 
651 bool VPScalarIVStepsRecipe::isCanonical() const {
652   auto *CanIV = getCanonicalIV();
653   // The start value of the steps-recipe must match the start value of the
654   // canonical induction and it must step by 1.
655   if (CanIV->getStartValue() != getStartValue())
656     return false;
657   auto *StepVPV = getStepValue();
658   if (StepVPV->getDef())
659     return false;
660   auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
661   return StepC && StepC->isOne();
662 }
663 
664 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
665 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
666                                   VPSlotTracker &SlotTracker) const {
667   O << Indent;
668   printAsOperand(O, SlotTracker);
669   O << Indent << "= SCALAR-STEPS ";
670   printOperands(O, SlotTracker);
671 }
672 #endif
673 
674 void VPWidenGEPRecipe::execute(VPTransformState &State) {
675   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
676   // Construct a vector GEP by widening the operands of the scalar GEP as
677   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
678   // results in a vector of pointers when at least one operand of the GEP
679   // is vector-typed. Thus, to keep the representation compact, we only use
680   // vector-typed operands for loop-varying values.
681 
682   if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
683     // If we are vectorizing, but the GEP has only loop-invariant operands,
684     // the GEP we build (by only using vector-typed operands for
685     // loop-varying values) would be a scalar pointer. Thus, to ensure we
686     // produce a vector of pointers, we need to either arbitrarily pick an
687     // operand to broadcast, or broadcast a clone of the original GEP.
688     // Here, we broadcast a clone of the original.
689     //
690     // TODO: If at some point we decide to scalarize instructions having
691     //       loop-invariant operands, this special case will no longer be
692     //       required. We would add the scalarization decision to
693     //       collectLoopScalars() and teach getVectorValue() to broadcast
694     //       the lane-zero scalar value.
695     auto *Clone = State.Builder.Insert(GEP->clone());
696     for (unsigned Part = 0; Part < State.UF; ++Part) {
697       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
698       State.set(this, EntryPart, Part);
699       State.addMetadata(EntryPart, GEP);
700     }
701   } else {
702     // If the GEP has at least one loop-varying operand, we are sure to
703     // produce a vector of pointers. But if we are only unrolling, we want
704     // to produce a scalar GEP for each unroll part. Thus, the GEP we
705     // produce with the code below will be scalar (if VF == 1) or vector
706     // (otherwise). Note that for the unroll-only case, we still maintain
707     // values in the vector mapping with initVector, as we do for other
708     // instructions.
709     for (unsigned Part = 0; Part < State.UF; ++Part) {
710       // The pointer operand of the new GEP. If it's loop-invariant, we
711       // won't broadcast it.
712       auto *Ptr = IsPtrLoopInvariant
713                       ? State.get(getOperand(0), VPIteration(0, 0))
714                       : State.get(getOperand(0), Part);
715 
716       // Collect all the indices for the new GEP. If any index is
717       // loop-invariant, we won't broadcast it.
718       SmallVector<Value *, 4> Indices;
719       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
720         VPValue *Operand = getOperand(I);
721         if (IsIndexLoopInvariant[I - 1])
722           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
723         else
724           Indices.push_back(State.get(Operand, Part));
725       }
726 
727       // If the GEP instruction is vectorized and was in a basic block that
728       // needed predication, we can't propagate the poison-generating 'inbounds'
729       // flag. The control flow has been linearized and the GEP is no longer
730       // guarded by the predicate, which could make the 'inbounds' properties to
731       // no longer hold.
732       bool IsInBounds =
733           GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
734 
735       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
736       // but it should be a vector, otherwise.
737       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
738                                              Indices, "", IsInBounds);
739       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
740              "NewGEP is not a pointer vector");
741       State.set(this, NewGEP, Part);
742       State.addMetadata(NewGEP, GEP);
743     }
744   }
745 }
746 
747 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
748 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
749                              VPSlotTracker &SlotTracker) const {
750   O << Indent << "WIDEN-GEP ";
751   O << (IsPtrLoopInvariant ? "Inv" : "Var");
752   size_t IndicesNumber = IsIndexLoopInvariant.size();
753   for (size_t I = 0; I < IndicesNumber; ++I)
754     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
755 
756   O << " ";
757   printAsOperand(O, SlotTracker);
758   O << " = getelementptr ";
759   printOperands(O, SlotTracker);
760 }
761 #endif
762 
763 void VPBlendRecipe::execute(VPTransformState &State) {
764   State.setDebugLocFromInst(Phi);
765   // We know that all PHIs in non-header blocks are converted into
766   // selects, so we don't have to worry about the insertion order and we
767   // can just use the builder.
768   // At this point we generate the predication tree. There may be
769   // duplications since this is a simple recursive scan, but future
770   // optimizations will clean it up.
771 
772   unsigned NumIncoming = getNumIncomingValues();
773 
774   // Generate a sequence of selects of the form:
775   // SELECT(Mask3, In3,
776   //        SELECT(Mask2, In2,
777   //               SELECT(Mask1, In1,
778   //                      In0)))
779   // Note that Mask0 is never used: lanes for which no path reaches this phi and
780   // are essentially undef are taken from In0.
781  VectorParts Entry(State.UF);
782   for (unsigned In = 0; In < NumIncoming; ++In) {
783     for (unsigned Part = 0; Part < State.UF; ++Part) {
784       // We might have single edge PHIs (blocks) - use an identity
785       // 'select' for the first PHI operand.
786       Value *In0 = State.get(getIncomingValue(In), Part);
787       if (In == 0)
788         Entry[Part] = In0; // Initialize with the first incoming value.
789       else {
790         // Select between the current value and the previous incoming edge
791         // based on the incoming mask.
792         Value *Cond = State.get(getMask(In), Part);
793         Entry[Part] =
794             State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
795       }
796     }
797   }
798   for (unsigned Part = 0; Part < State.UF; ++Part)
799     State.set(this, Entry[Part], Part);
800 }
801 
802 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
803 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
804                           VPSlotTracker &SlotTracker) const {
805   O << Indent << "BLEND ";
806   Phi->printAsOperand(O, false);
807   O << " =";
808   if (getNumIncomingValues() == 1) {
809     // Not a User of any mask: not really blending, this is a
810     // single-predecessor phi.
811     O << " ";
812     getIncomingValue(0)->printAsOperand(O, SlotTracker);
813   } else {
814     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
815       O << " ";
816       getIncomingValue(I)->printAsOperand(O, SlotTracker);
817       O << "/";
818       getMask(I)->printAsOperand(O, SlotTracker);
819     }
820   }
821 }
822 
823 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
824                               VPSlotTracker &SlotTracker) const {
825   O << Indent << "REDUCE ";
826   printAsOperand(O, SlotTracker);
827   O << " = ";
828   getChainOp()->printAsOperand(O, SlotTracker);
829   O << " +";
830   if (isa<FPMathOperator>(getUnderlyingInstr()))
831     O << getUnderlyingInstr()->getFastMathFlags();
832   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
833   getVecOp()->printAsOperand(O, SlotTracker);
834   if (getCondOp()) {
835     O << ", ";
836     getCondOp()->printAsOperand(O, SlotTracker);
837   }
838   O << ")";
839   if (RdxDesc->IntermediateStore)
840     O << " (with final reduction value stored in invariant address sank "
841          "outside of loop)";
842 }
843 
844 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
845                               VPSlotTracker &SlotTracker) const {
846   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
847 
848   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
849     printAsOperand(O, SlotTracker);
850     O << " = ";
851   }
852   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
853     O << "call @" << CB->getCalledFunction()->getName() << "(";
854     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
855                     O, [&O, &SlotTracker](VPValue *Op) {
856                       Op->printAsOperand(O, SlotTracker);
857                     });
858     O << ")";
859   } else {
860     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
861     printOperands(O, SlotTracker);
862   }
863 
864   if (AlsoPack)
865     O << " (S->V)";
866 }
867 #endif
868 
869 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
870   assert(State.Instance && "Branch on Mask works only on single instance.");
871 
872   unsigned Part = State.Instance->Part;
873   unsigned Lane = State.Instance->Lane.getKnownLane();
874 
875   Value *ConditionBit = nullptr;
876   VPValue *BlockInMask = getMask();
877   if (BlockInMask) {
878     ConditionBit = State.get(BlockInMask, Part);
879     if (ConditionBit->getType()->isVectorTy())
880       ConditionBit = State.Builder.CreateExtractElement(
881           ConditionBit, State.Builder.getInt32(Lane));
882   } else // Block in mask is all-one.
883     ConditionBit = State.Builder.getTrue();
884 
885   // Replace the temporary unreachable terminator with a new conditional branch,
886   // whose two destinations will be set later when they are created.
887   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
888   assert(isa<UnreachableInst>(CurrentTerminator) &&
889          "Expected to replace unreachable terminator with conditional branch.");
890   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
891   CondBr->setSuccessor(0, nullptr);
892   ReplaceInstWithInst(CurrentTerminator, CondBr);
893 }
894 
895 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
896   assert(State.Instance && "Predicated instruction PHI works per instance.");
897   Instruction *ScalarPredInst =
898       cast<Instruction>(State.get(getOperand(0), *State.Instance));
899   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
900   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
901   assert(PredicatingBB && "Predicated block has no single predecessor.");
902   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
903          "operand must be VPReplicateRecipe");
904 
905   // By current pack/unpack logic we need to generate only a single phi node: if
906   // a vector value for the predicated instruction exists at this point it means
907   // the instruction has vector users only, and a phi for the vector value is
908   // needed. In this case the recipe of the predicated instruction is marked to
909   // also do that packing, thereby "hoisting" the insert-element sequence.
910   // Otherwise, a phi node for the scalar value is needed.
911   unsigned Part = State.Instance->Part;
912   if (State.hasVectorValue(getOperand(0), Part)) {
913     Value *VectorValue = State.get(getOperand(0), Part);
914     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
915     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
916     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
917     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
918     if (State.hasVectorValue(this, Part))
919       State.reset(this, VPhi, Part);
920     else
921       State.set(this, VPhi, Part);
922     // NOTE: Currently we need to update the value of the operand, so the next
923     // predicated iteration inserts its generated value in the correct vector.
924     State.reset(getOperand(0), VPhi, Part);
925   } else {
926     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
927     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
928     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
929                      PredicatingBB);
930     Phi->addIncoming(ScalarPredInst, PredicatedBB);
931     if (State.hasScalarValue(this, *State.Instance))
932       State.reset(this, Phi, *State.Instance);
933     else
934       State.set(this, Phi, *State.Instance);
935     // NOTE: Currently we need to update the value of the operand, so the next
936     // predicated iteration inserts its generated value in the correct vector.
937     State.reset(getOperand(0), Phi, *State.Instance);
938   }
939 }
940 
941 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
942 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
943                                 VPSlotTracker &SlotTracker) const {
944   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
945   printAsOperand(O, SlotTracker);
946   O << " = ";
947   printOperands(O, SlotTracker);
948 }
949 
950 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
951                                            VPSlotTracker &SlotTracker) const {
952   O << Indent << "WIDEN ";
953 
954   if (!isStore()) {
955     getVPSingleValue()->printAsOperand(O, SlotTracker);
956     O << " = ";
957   }
958   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
959 
960   printOperands(O, SlotTracker);
961 }
962 #endif
963 
964 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
965   Value *Start = getStartValue()->getLiveInIRValue();
966   PHINode *EntryPart = PHINode::Create(
967       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
968 
969   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
970   EntryPart->addIncoming(Start, VectorPH);
971   EntryPart->setDebugLoc(DL);
972   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
973     State.set(this, EntryPart, Part);
974 }
975 
976 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
977 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
978                                    VPSlotTracker &SlotTracker) const {
979   O << Indent << "EMIT ";
980   printAsOperand(O, SlotTracker);
981   O << " = CANONICAL-INDUCTION";
982 }
983 #endif
984 
985 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
986   return IsScalarAfterVectorization &&
987          (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
988 }
989 
990 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
991 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
992                                           VPSlotTracker &SlotTracker) const {
993   O << Indent << "EMIT ";
994   printAsOperand(O, SlotTracker);
995   O << " = WIDEN-POINTER-INDUCTION ";
996   getStartValue()->printAsOperand(O, SlotTracker);
997   O << ", " << *IndDesc.getStep();
998 }
999 #endif
1000 
1001 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1002   assert(!State.Instance && "cannot be used in per-lane");
1003   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1004   SCEVExpander Exp(SE, DL, "induction");
1005 
1006   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1007                                  &*State.Builder.GetInsertPoint());
1008 
1009   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1010     State.set(this, Res, Part);
1011 }
1012 
1013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1014 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1015                                VPSlotTracker &SlotTracker) const {
1016   O << Indent << "EMIT ";
1017   getVPSingleValue()->printAsOperand(O, SlotTracker);
1018   O << " = EXPAND SCEV " << *Expr;
1019 }
1020 #endif
1021 
1022 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1023   Value *CanonicalIV = State.get(getOperand(0), 0);
1024   Type *STy = CanonicalIV->getType();
1025   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1026   ElementCount VF = State.VF;
1027   Value *VStart = VF.isScalar()
1028                       ? CanonicalIV
1029                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1030   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1031     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1032     if (VF.isVector()) {
1033       VStep = Builder.CreateVectorSplat(VF, VStep);
1034       VStep =
1035           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1036     }
1037     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1038     State.set(this, CanonicalVectorIV, Part);
1039   }
1040 }
1041 
1042 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1043 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1044                                      VPSlotTracker &SlotTracker) const {
1045   O << Indent << "EMIT ";
1046   printAsOperand(O, SlotTracker);
1047   O << " = WIDEN-CANONICAL-INDUCTION ";
1048   printOperands(O, SlotTracker);
1049 }
1050 #endif
1051 
1052 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1053   auto &Builder = State.Builder;
1054   // Create a vector from the initial value.
1055   auto *VectorInit = getStartValue()->getLiveInIRValue();
1056 
1057   Type *VecTy = State.VF.isScalar()
1058                     ? VectorInit->getType()
1059                     : VectorType::get(VectorInit->getType(), State.VF);
1060 
1061   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1062   if (State.VF.isVector()) {
1063     auto *IdxTy = Builder.getInt32Ty();
1064     auto *One = ConstantInt::get(IdxTy, 1);
1065     IRBuilder<>::InsertPointGuard Guard(Builder);
1066     Builder.SetInsertPoint(VectorPH->getTerminator());
1067     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1068     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1069     VectorInit = Builder.CreateInsertElement(
1070         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1071   }
1072 
1073   // Create a phi node for the new recurrence.
1074   PHINode *EntryPart = PHINode::Create(
1075       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
1076   EntryPart->addIncoming(VectorInit, VectorPH);
1077   State.set(this, EntryPart, 0);
1078 }
1079 
1080 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1081 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1082                                             VPSlotTracker &SlotTracker) const {
1083   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1084   printAsOperand(O, SlotTracker);
1085   O << " = phi ";
1086   printOperands(O, SlotTracker);
1087 }
1088 #endif
1089 
1090 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1091   PHINode *PN = cast<PHINode>(getUnderlyingValue());
1092   auto &Builder = State.Builder;
1093 
1094   // In order to support recurrences we need to be able to vectorize Phi nodes.
1095   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1096   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1097   // this value when we vectorize all of the instructions that use the PHI.
1098   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1099   Type *VecTy =
1100       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1101 
1102   BasicBlock *HeaderBB = State.CFG.PrevBB;
1103   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1104          "recipe must be in the vector loop header");
1105   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1106   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1107     Value *EntryPart =
1108         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
1109     State.set(this, EntryPart, Part);
1110   }
1111 
1112   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1113 
1114   // Reductions do not have to start at zero. They can start with
1115   // any loop invariant values.
1116   VPValue *StartVPV = getStartValue();
1117   Value *StartV = StartVPV->getLiveInIRValue();
1118 
1119   Value *Iden = nullptr;
1120   RecurKind RK = RdxDesc.getRecurrenceKind();
1121   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1122       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
1123     // MinMax reduction have the start value as their identify.
1124     if (ScalarPHI) {
1125       Iden = StartV;
1126     } else {
1127       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1128       Builder.SetInsertPoint(VectorPH->getTerminator());
1129       StartV = Iden =
1130           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1131     }
1132   } else {
1133     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1134                                          RdxDesc.getFastMathFlags());
1135 
1136     if (!ScalarPHI) {
1137       Iden = Builder.CreateVectorSplat(State.VF, Iden);
1138       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1139       Builder.SetInsertPoint(VectorPH->getTerminator());
1140       Constant *Zero = Builder.getInt32(0);
1141       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1142     }
1143   }
1144 
1145   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1146     Value *EntryPart = State.get(this, Part);
1147     // Make sure to add the reduction start value only to the
1148     // first unroll part.
1149     Value *StartVal = (Part == 0) ? StartV : Iden;
1150     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1151   }
1152 }
1153 
1154 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1155 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1156                                  VPSlotTracker &SlotTracker) const {
1157   O << Indent << "WIDEN-REDUCTION-PHI ";
1158 
1159   printAsOperand(O, SlotTracker);
1160   O << " = phi ";
1161   printOperands(O, SlotTracker);
1162 }
1163 #endif
1164 
1165 void VPWidenPHIRecipe::execute(VPTransformState &State) {
1166   assert(EnableVPlanNativePath &&
1167          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1168 
1169   // Currently we enter here in the VPlan-native path for non-induction
1170   // PHIs where all control flow is uniform. We simply widen these PHIs.
1171   // Create a vector phi with no operands - the vector phi operands will be
1172   // set at the end of vector code generation.
1173   VPBasicBlock *Parent = getParent();
1174   VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
1175   unsigned StartIdx = 0;
1176   // For phis in header blocks of loop regions, use the index of the value
1177   // coming from the preheader.
1178   if (LoopRegion->getEntryBasicBlock() == Parent) {
1179     for (unsigned I = 0; I < getNumOperands(); ++I) {
1180       if (getIncomingBlock(I) ==
1181           LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
1182         StartIdx = I;
1183     }
1184   }
1185   Value *Op0 = State.get(getOperand(StartIdx), 0);
1186   Type *VecTy = Op0->getType();
1187   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1188   State.set(this, VecPhi, 0);
1189 }
1190 
1191 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1192 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1193                              VPSlotTracker &SlotTracker) const {
1194   O << Indent << "WIDEN-PHI ";
1195 
1196   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1197   // Unless all incoming values are modeled in VPlan  print the original PHI
1198   // directly.
1199   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1200   // values as VPValues.
1201   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1202     O << VPlanIngredient(OriginalPhi);
1203     return;
1204   }
1205 
1206   printAsOperand(O, SlotTracker);
1207   O << " = phi ";
1208   printOperands(O, SlotTracker);
1209 }
1210 #endif
1211 
1212 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1213 // remove VPActiveLaneMaskPHIRecipe.
1214 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
1215   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1216   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1217     Value *StartMask = State.get(getOperand(0), Part);
1218     PHINode *EntryPart =
1219         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1220     EntryPart->addIncoming(StartMask, VectorPH);
1221     EntryPart->setDebugLoc(DL);
1222     State.set(this, EntryPart, Part);
1223   }
1224 }
1225 
1226 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1227 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1228                                       VPSlotTracker &SlotTracker) const {
1229   O << Indent << "ACTIVE-LANE-MASK-PHI ";
1230 
1231   printAsOperand(O, SlotTracker);
1232   O << " = phi ";
1233   printOperands(O, SlotTracker);
1234 }
1235 #endif
1236