1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
17 #include "llvm/Analysis/Loads.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/Analysis/VectorUtils.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 #include "llvm/IR/PatternMatch.h"
23 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
24
25 using namespace llvm;
26 using namespace PatternMatch;
27
28 #define LV_NAME "loop-vectorize"
29 #define DEBUG_TYPE LV_NAME
30
31 extern cl::opt<bool> EnableVPlanPredication;
32
33 static cl::opt<bool>
34 EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
35 cl::desc("Enable if-conversion during vectorization."));
36
37 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
38 "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
39 cl::desc("The maximum allowed number of runtime memory checks with a "
40 "vectorize(enable) pragma."));
41
42 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
43 "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
44 cl::desc("The maximum number of SCEV checks allowed."));
45
46 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
47 "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
48 cl::desc("The maximum number of SCEV checks allowed with a "
49 "vectorize(enable) pragma"));
50
51 /// Maximum vectorization interleave count.
52 static const unsigned MaxInterleaveFactor = 16;
53
54 namespace llvm {
55
validate(unsigned Val)56 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
57 switch (Kind) {
58 case HK_WIDTH:
59 return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
60 case HK_UNROLL:
61 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
62 case HK_FORCE:
63 return (Val <= 1);
64 case HK_ISVECTORIZED:
65 case HK_PREDICATE:
66 return (Val == 0 || Val == 1);
67 }
68 return false;
69 }
70
LoopVectorizeHints(const Loop * L,bool InterleaveOnlyWhenForced,OptimizationRemarkEmitter & ORE)71 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
72 bool InterleaveOnlyWhenForced,
73 OptimizationRemarkEmitter &ORE)
74 : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
75 Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
76 Force("vectorize.enable", FK_Undefined, HK_FORCE),
77 IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
78 Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
79 ORE(ORE) {
80 // Populate values with existing loop metadata.
81 getHintsFromMetadata();
82
83 // force-vector-interleave overrides DisableInterleaving.
84 if (VectorizerParams::isInterleaveForced())
85 Interleave.Value = VectorizerParams::VectorizationInterleave;
86
87 if (IsVectorized.Value != 1)
88 // If the vectorization width and interleaving count are both 1 then
89 // consider the loop to have been already vectorized because there's
90 // nothing more that we can do.
91 IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
92 LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
93 << "LV: Interleaving disabled by the pass manager\n");
94 }
95
setAlreadyVectorized()96 void LoopVectorizeHints::setAlreadyVectorized() {
97 LLVMContext &Context = TheLoop->getHeader()->getContext();
98
99 MDNode *IsVectorizedMD = MDNode::get(
100 Context,
101 {MDString::get(Context, "llvm.loop.isvectorized"),
102 ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
103 MDNode *LoopID = TheLoop->getLoopID();
104 MDNode *NewLoopID =
105 makePostTransformationMetadata(Context, LoopID,
106 {Twine(Prefix(), "vectorize.").str(),
107 Twine(Prefix(), "interleave.").str()},
108 {IsVectorizedMD});
109 TheLoop->setLoopID(NewLoopID);
110
111 // Update internal cache.
112 IsVectorized.Value = 1;
113 }
114
allowVectorization(Function * F,Loop * L,bool VectorizeOnlyWhenForced) const115 bool LoopVectorizeHints::allowVectorization(
116 Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
117 if (getForce() == LoopVectorizeHints::FK_Disabled) {
118 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
119 emitRemarkWithHints();
120 return false;
121 }
122
123 if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
124 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
125 emitRemarkWithHints();
126 return false;
127 }
128
129 if (getIsVectorized() == 1) {
130 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
131 // FIXME: Add interleave.disable metadata. This will allow
132 // vectorize.disable to be used without disabling the pass and errors
133 // to differentiate between disabled vectorization and a width of 1.
134 ORE.emit([&]() {
135 return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
136 "AllDisabled", L->getStartLoc(),
137 L->getHeader())
138 << "loop not vectorized: vectorization and interleaving are "
139 "explicitly disabled, or the loop has already been "
140 "vectorized";
141 });
142 return false;
143 }
144
145 return true;
146 }
147
emitRemarkWithHints() const148 void LoopVectorizeHints::emitRemarkWithHints() const {
149 using namespace ore;
150
151 ORE.emit([&]() {
152 if (Force.Value == LoopVectorizeHints::FK_Disabled)
153 return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
154 TheLoop->getStartLoc(),
155 TheLoop->getHeader())
156 << "loop not vectorized: vectorization is explicitly disabled";
157 else {
158 OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
159 TheLoop->getStartLoc(), TheLoop->getHeader());
160 R << "loop not vectorized";
161 if (Force.Value == LoopVectorizeHints::FK_Enabled) {
162 R << " (Force=" << NV("Force", true);
163 if (Width.Value != 0)
164 R << ", Vector Width=" << NV("VectorWidth", Width.Value);
165 if (Interleave.Value != 0)
166 R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
167 R << ")";
168 }
169 return R;
170 }
171 });
172 }
173
vectorizeAnalysisPassName() const174 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
175 if (getWidth() == 1)
176 return LV_NAME;
177 if (getForce() == LoopVectorizeHints::FK_Disabled)
178 return LV_NAME;
179 if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
180 return LV_NAME;
181 return OptimizationRemarkAnalysis::AlwaysPrint;
182 }
183
getHintsFromMetadata()184 void LoopVectorizeHints::getHintsFromMetadata() {
185 MDNode *LoopID = TheLoop->getLoopID();
186 if (!LoopID)
187 return;
188
189 // First operand should refer to the loop id itself.
190 assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
191 assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
192
193 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
194 const MDString *S = nullptr;
195 SmallVector<Metadata *, 4> Args;
196
197 // The expected hint is either a MDString or a MDNode with the first
198 // operand a MDString.
199 if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
200 if (!MD || MD->getNumOperands() == 0)
201 continue;
202 S = dyn_cast<MDString>(MD->getOperand(0));
203 for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
204 Args.push_back(MD->getOperand(i));
205 } else {
206 S = dyn_cast<MDString>(LoopID->getOperand(i));
207 assert(Args.size() == 0 && "too many arguments for MDString");
208 }
209
210 if (!S)
211 continue;
212
213 // Check if the hint starts with the loop metadata prefix.
214 StringRef Name = S->getString();
215 if (Args.size() == 1)
216 setHint(Name, Args[0]);
217 }
218 }
219
setHint(StringRef Name,Metadata * Arg)220 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
221 if (!Name.startswith(Prefix()))
222 return;
223 Name = Name.substr(Prefix().size(), StringRef::npos);
224
225 const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
226 if (!C)
227 return;
228 unsigned Val = C->getZExtValue();
229
230 Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
231 for (auto H : Hints) {
232 if (Name == H->Name) {
233 if (H->validate(Val))
234 H->Value = Val;
235 else
236 LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
237 break;
238 }
239 }
240 }
241
doesNotMeet(Function * F,Loop * L,const LoopVectorizeHints & Hints)242 bool LoopVectorizationRequirements::doesNotMeet(
243 Function *F, Loop *L, const LoopVectorizeHints &Hints) {
244 const char *PassName = Hints.vectorizeAnalysisPassName();
245 bool Failed = false;
246 if (UnsafeAlgebraInst && !Hints.allowReordering()) {
247 ORE.emit([&]() {
248 return OptimizationRemarkAnalysisFPCommute(
249 PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
250 UnsafeAlgebraInst->getParent())
251 << "loop not vectorized: cannot prove it is safe to reorder "
252 "floating-point operations";
253 });
254 Failed = true;
255 }
256
257 // Test if runtime memcheck thresholds are exceeded.
258 bool PragmaThresholdReached =
259 NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
260 bool ThresholdReached =
261 NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
262 if ((ThresholdReached && !Hints.allowReordering()) ||
263 PragmaThresholdReached) {
264 ORE.emit([&]() {
265 return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
266 L->getStartLoc(),
267 L->getHeader())
268 << "loop not vectorized: cannot prove it is safe to reorder "
269 "memory operations";
270 });
271 LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
272 Failed = true;
273 }
274
275 return Failed;
276 }
277
278 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
279 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
280 // executing the inner loop will execute the same iterations). This check is
281 // very constrained for now but it will be relaxed in the future. \p Lp is
282 // considered uniform if it meets all the following conditions:
283 // 1) it has a canonical IV (starting from 0 and with stride 1),
284 // 2) its latch terminator is a conditional branch and,
285 // 3) its latch condition is a compare instruction whose operands are the
286 // canonical IV and an OuterLp invariant.
287 // This check doesn't take into account the uniformity of other conditions not
288 // related to the loop latch because they don't affect the loop uniformity.
289 //
290 // NOTE: We decided to keep all these checks and its associated documentation
291 // together so that we can easily have a picture of the current supported loop
292 // nests. However, some of the current checks don't depend on \p OuterLp and
293 // would be redundantly executed for each \p Lp if we invoked this function for
294 // different candidate outer loops. This is not the case for now because we
295 // don't currently have the infrastructure to evaluate multiple candidate outer
296 // loops and \p OuterLp will be a fixed parameter while we only support explicit
297 // outer loop vectorization. It's also very likely that these checks go away
298 // before introducing the aforementioned infrastructure. However, if this is not
299 // the case, we should move the \p OuterLp independent checks to a separate
300 // function that is only executed once for each \p Lp.
isUniformLoop(Loop * Lp,Loop * OuterLp)301 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
302 assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
303
304 // If Lp is the outer loop, it's uniform by definition.
305 if (Lp == OuterLp)
306 return true;
307 assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
308
309 // 1.
310 PHINode *IV = Lp->getCanonicalInductionVariable();
311 if (!IV) {
312 LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
313 return false;
314 }
315
316 // 2.
317 BasicBlock *Latch = Lp->getLoopLatch();
318 auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
319 if (!LatchBr || LatchBr->isUnconditional()) {
320 LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
321 return false;
322 }
323
324 // 3.
325 auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
326 if (!LatchCmp) {
327 LLVM_DEBUG(
328 dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
329 return false;
330 }
331
332 Value *CondOp0 = LatchCmp->getOperand(0);
333 Value *CondOp1 = LatchCmp->getOperand(1);
334 Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
335 if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
336 !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
337 LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
338 return false;
339 }
340
341 return true;
342 }
343
344 // Return true if \p Lp and all its nested loops are uniform with regard to \p
345 // OuterLp.
isUniformLoopNest(Loop * Lp,Loop * OuterLp)346 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
347 if (!isUniformLoop(Lp, OuterLp))
348 return false;
349
350 // Check if nested loops are uniform.
351 for (Loop *SubLp : *Lp)
352 if (!isUniformLoopNest(SubLp, OuterLp))
353 return false;
354
355 return true;
356 }
357
358 /// Check whether it is safe to if-convert this phi node.
359 ///
360 /// Phi nodes with constant expressions that can trap are not safe to if
361 /// convert.
canIfConvertPHINodes(BasicBlock * BB)362 static bool canIfConvertPHINodes(BasicBlock *BB) {
363 for (PHINode &Phi : BB->phis()) {
364 for (Value *V : Phi.incoming_values())
365 if (auto *C = dyn_cast<Constant>(V))
366 if (C->canTrap())
367 return false;
368 }
369 return true;
370 }
371
convertPointerToIntegerType(const DataLayout & DL,Type * Ty)372 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
373 if (Ty->isPointerTy())
374 return DL.getIntPtrType(Ty);
375
376 // It is possible that char's or short's overflow when we ask for the loop's
377 // trip count, work around this by changing the type size.
378 if (Ty->getScalarSizeInBits() < 32)
379 return Type::getInt32Ty(Ty->getContext());
380
381 return Ty;
382 }
383
getWiderType(const DataLayout & DL,Type * Ty0,Type * Ty1)384 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
385 Ty0 = convertPointerToIntegerType(DL, Ty0);
386 Ty1 = convertPointerToIntegerType(DL, Ty1);
387 if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
388 return Ty0;
389 return Ty1;
390 }
391
392 /// Check that the instruction has outside loop users and is not an
393 /// identified reduction variable.
hasOutsideLoopUser(const Loop * TheLoop,Instruction * Inst,SmallPtrSetImpl<Value * > & AllowedExit)394 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
395 SmallPtrSetImpl<Value *> &AllowedExit) {
396 // Reductions, Inductions and non-header phis are allowed to have exit users. All
397 // other instructions must not have external users.
398 if (!AllowedExit.count(Inst))
399 // Check that all of the users of the loop are inside the BB.
400 for (User *U : Inst->users()) {
401 Instruction *UI = cast<Instruction>(U);
402 // This user may be a reduction exit value.
403 if (!TheLoop->contains(UI)) {
404 LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
405 return true;
406 }
407 }
408 return false;
409 }
410
isConsecutivePtr(Value * Ptr)411 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
412 const ValueToValueMap &Strides =
413 getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
414
415 bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
416 int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
417 if (Stride == 1 || Stride == -1)
418 return Stride;
419 return 0;
420 }
421
isUniform(Value * V)422 bool LoopVectorizationLegality::isUniform(Value *V) {
423 return LAI->isUniform(V);
424 }
425
canVectorizeOuterLoop()426 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
427 assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
428 // Store the result and return it at the end instead of exiting early, in case
429 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
430 bool Result = true;
431 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
432
433 for (BasicBlock *BB : TheLoop->blocks()) {
434 // Check whether the BB terminator is a BranchInst. Any other terminator is
435 // not supported yet.
436 auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
437 if (!Br) {
438 reportVectorizationFailure("Unsupported basic block terminator",
439 "loop control flow is not understood by vectorizer",
440 "CFGNotUnderstood", ORE, TheLoop);
441 if (DoExtraAnalysis)
442 Result = false;
443 else
444 return false;
445 }
446
447 // Check whether the BranchInst is a supported one. Only unconditional
448 // branches, conditional branches with an outer loop invariant condition or
449 // backedges are supported.
450 // FIXME: We skip these checks when VPlan predication is enabled as we
451 // want to allow divergent branches. This whole check will be removed
452 // once VPlan predication is on by default.
453 if (!EnableVPlanPredication && Br && Br->isConditional() &&
454 !TheLoop->isLoopInvariant(Br->getCondition()) &&
455 !LI->isLoopHeader(Br->getSuccessor(0)) &&
456 !LI->isLoopHeader(Br->getSuccessor(1))) {
457 reportVectorizationFailure("Unsupported conditional branch",
458 "loop control flow is not understood by vectorizer",
459 "CFGNotUnderstood", ORE, TheLoop);
460 if (DoExtraAnalysis)
461 Result = false;
462 else
463 return false;
464 }
465 }
466
467 // Check whether inner loops are uniform. At this point, we only support
468 // simple outer loops scenarios with uniform nested loops.
469 if (!isUniformLoopNest(TheLoop /*loop nest*/,
470 TheLoop /*context outer loop*/)) {
471 reportVectorizationFailure("Outer loop contains divergent loops",
472 "loop control flow is not understood by vectorizer",
473 "CFGNotUnderstood", ORE, TheLoop);
474 if (DoExtraAnalysis)
475 Result = false;
476 else
477 return false;
478 }
479
480 // Check whether we are able to set up outer loop induction.
481 if (!setupOuterLoopInductions()) {
482 reportVectorizationFailure("Unsupported outer loop Phi(s)",
483 "Unsupported outer loop Phi(s)",
484 "UnsupportedPhi", ORE, TheLoop);
485 if (DoExtraAnalysis)
486 Result = false;
487 else
488 return false;
489 }
490
491 return Result;
492 }
493
addInductionPhi(PHINode * Phi,const InductionDescriptor & ID,SmallPtrSetImpl<Value * > & AllowedExit)494 void LoopVectorizationLegality::addInductionPhi(
495 PHINode *Phi, const InductionDescriptor &ID,
496 SmallPtrSetImpl<Value *> &AllowedExit) {
497 Inductions[Phi] = ID;
498
499 // In case this induction also comes with casts that we know we can ignore
500 // in the vectorized loop body, record them here. All casts could be recorded
501 // here for ignoring, but suffices to record only the first (as it is the
502 // only one that may bw used outside the cast sequence).
503 const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
504 if (!Casts.empty())
505 InductionCastsToIgnore.insert(*Casts.begin());
506
507 Type *PhiTy = Phi->getType();
508 const DataLayout &DL = Phi->getModule()->getDataLayout();
509
510 // Get the widest type.
511 if (!PhiTy->isFloatingPointTy()) {
512 if (!WidestIndTy)
513 WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
514 else
515 WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
516 }
517
518 // Int inductions are special because we only allow one IV.
519 if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
520 ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
521 isa<Constant>(ID.getStartValue()) &&
522 cast<Constant>(ID.getStartValue())->isNullValue()) {
523
524 // Use the phi node with the widest type as induction. Use the last
525 // one if there are multiple (no good reason for doing this other
526 // than it is expedient). We've checked that it begins at zero and
527 // steps by one, so this is a canonical induction variable.
528 if (!PrimaryInduction || PhiTy == WidestIndTy)
529 PrimaryInduction = Phi;
530 }
531
532 // Both the PHI node itself, and the "post-increment" value feeding
533 // back into the PHI node may have external users.
534 // We can allow those uses, except if the SCEVs we have for them rely
535 // on predicates that only hold within the loop, since allowing the exit
536 // currently means re-using this SCEV outside the loop (see PR33706 for more
537 // details).
538 if (PSE.getUnionPredicate().isAlwaysTrue()) {
539 AllowedExit.insert(Phi);
540 AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
541 }
542
543 LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
544 }
545
setupOuterLoopInductions()546 bool LoopVectorizationLegality::setupOuterLoopInductions() {
547 BasicBlock *Header = TheLoop->getHeader();
548
549 // Returns true if a given Phi is a supported induction.
550 auto isSupportedPhi = [&](PHINode &Phi) -> bool {
551 InductionDescriptor ID;
552 if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
553 ID.getKind() == InductionDescriptor::IK_IntInduction) {
554 addInductionPhi(&Phi, ID, AllowedExit);
555 return true;
556 } else {
557 // Bail out for any Phi in the outer loop header that is not a supported
558 // induction.
559 LLVM_DEBUG(
560 dbgs()
561 << "LV: Found unsupported PHI for outer loop vectorization.\n");
562 return false;
563 }
564 };
565
566 if (llvm::all_of(Header->phis(), isSupportedPhi))
567 return true;
568 else
569 return false;
570 }
571
572 /// Checks if a function is scalarizable according to the TLI, in
573 /// the sense that it should be vectorized and then expanded in
574 /// multiple scalarcalls. This is represented in the
575 /// TLI via mappings that do not specify a vector name, as in the
576 /// following example:
577 ///
578 /// const VecDesc VecIntrinsics[] = {
579 /// {"llvm.phx.abs.i32", "", 4}
580 /// };
isTLIScalarize(const TargetLibraryInfo & TLI,const CallInst & CI)581 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
582 const StringRef ScalarName = CI.getCalledFunction()->getName();
583 bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
584 // Check that all known VFs are not associated to a vector
585 // function, i.e. the vector name is emty.
586 if (Scalarize)
587 for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
588 VF <= WidestVF; VF *= 2) {
589 Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
590 }
591 return Scalarize;
592 }
593
canVectorizeInstrs()594 bool LoopVectorizationLegality::canVectorizeInstrs() {
595 BasicBlock *Header = TheLoop->getHeader();
596
597 // Look for the attribute signaling the absence of NaNs.
598 Function &F = *Header->getParent();
599 HasFunNoNaNAttr =
600 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
601
602 // For each block in the loop.
603 for (BasicBlock *BB : TheLoop->blocks()) {
604 // Scan the instructions in the block and look for hazards.
605 for (Instruction &I : *BB) {
606 if (auto *Phi = dyn_cast<PHINode>(&I)) {
607 Type *PhiTy = Phi->getType();
608 // Check that this PHI type is allowed.
609 if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
610 !PhiTy->isPointerTy()) {
611 reportVectorizationFailure("Found a non-int non-pointer PHI",
612 "loop control flow is not understood by vectorizer",
613 "CFGNotUnderstood", ORE, TheLoop);
614 return false;
615 }
616
617 // If this PHINode is not in the header block, then we know that we
618 // can convert it to select during if-conversion. No need to check if
619 // the PHIs in this block are induction or reduction variables.
620 if (BB != Header) {
621 // Non-header phi nodes that have outside uses can be vectorized. Add
622 // them to the list of allowed exits.
623 // Unsafe cyclic dependencies with header phis are identified during
624 // legalization for reduction, induction and first order
625 // recurrences.
626 AllowedExit.insert(&I);
627 continue;
628 }
629
630 // We only allow if-converted PHIs with exactly two incoming values.
631 if (Phi->getNumIncomingValues() != 2) {
632 reportVectorizationFailure("Found an invalid PHI",
633 "loop control flow is not understood by vectorizer",
634 "CFGNotUnderstood", ORE, TheLoop, Phi);
635 return false;
636 }
637
638 RecurrenceDescriptor RedDes;
639 if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
640 DT)) {
641 if (RedDes.hasUnsafeAlgebra())
642 Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
643 AllowedExit.insert(RedDes.getLoopExitInstr());
644 Reductions[Phi] = RedDes;
645 continue;
646 }
647
648 // TODO: Instead of recording the AllowedExit, it would be good to record the
649 // complementary set: NotAllowedExit. These include (but may not be
650 // limited to):
651 // 1. Reduction phis as they represent the one-before-last value, which
652 // is not available when vectorized
653 // 2. Induction phis and increment when SCEV predicates cannot be used
654 // outside the loop - see addInductionPhi
655 // 3. Non-Phis with outside uses when SCEV predicates cannot be used
656 // outside the loop - see call to hasOutsideLoopUser in the non-phi
657 // handling below
658 // 4. FirstOrderRecurrence phis that can possibly be handled by
659 // extraction.
660 // By recording these, we can then reason about ways to vectorize each
661 // of these NotAllowedExit.
662 InductionDescriptor ID;
663 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
664 addInductionPhi(Phi, ID, AllowedExit);
665 if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
666 Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
667 continue;
668 }
669
670 if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
671 SinkAfter, DT)) {
672 AllowedExit.insert(Phi);
673 FirstOrderRecurrences.insert(Phi);
674 continue;
675 }
676
677 // As a last resort, coerce the PHI to a AddRec expression
678 // and re-try classifying it a an induction PHI.
679 if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
680 addInductionPhi(Phi, ID, AllowedExit);
681 continue;
682 }
683
684 reportVectorizationFailure("Found an unidentified PHI",
685 "value that could not be identified as "
686 "reduction is used outside the loop",
687 "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
688 return false;
689 } // end of PHI handling
690
691 // We handle calls that:
692 // * Are debug info intrinsics.
693 // * Have a mapping to an IR intrinsic.
694 // * Have a vector version available.
695 auto *CI = dyn_cast<CallInst>(&I);
696
697 if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
698 !isa<DbgInfoIntrinsic>(CI) &&
699 !(CI->getCalledFunction() && TLI &&
700 (!VFDatabase::getMappings(*CI).empty() ||
701 isTLIScalarize(*TLI, *CI)))) {
702 // If the call is a recognized math libary call, it is likely that
703 // we can vectorize it given loosened floating-point constraints.
704 LibFunc Func;
705 bool IsMathLibCall =
706 TLI && CI->getCalledFunction() &&
707 CI->getType()->isFloatingPointTy() &&
708 TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
709 TLI->hasOptimizedCodeGen(Func);
710
711 if (IsMathLibCall) {
712 // TODO: Ideally, we should not use clang-specific language here,
713 // but it's hard to provide meaningful yet generic advice.
714 // Also, should this be guarded by allowExtraAnalysis() and/or be part
715 // of the returned info from isFunctionVectorizable()?
716 reportVectorizationFailure(
717 "Found a non-intrinsic callsite",
718 "library call cannot be vectorized. "
719 "Try compiling with -fno-math-errno, -ffast-math, "
720 "or similar flags",
721 "CantVectorizeLibcall", ORE, TheLoop, CI);
722 } else {
723 reportVectorizationFailure("Found a non-intrinsic callsite",
724 "call instruction cannot be vectorized",
725 "CantVectorizeLibcall", ORE, TheLoop, CI);
726 }
727 return false;
728 }
729
730 // Some intrinsics have scalar arguments and should be same in order for
731 // them to be vectorized (i.e. loop invariant).
732 if (CI) {
733 auto *SE = PSE.getSE();
734 Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
735 for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
736 if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
737 if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
738 reportVectorizationFailure("Found unvectorizable intrinsic",
739 "intrinsic instruction cannot be vectorized",
740 "CantVectorizeIntrinsic", ORE, TheLoop, CI);
741 return false;
742 }
743 }
744 }
745
746 // Check that the instruction return type is vectorizable.
747 // Also, we can't vectorize extractelement instructions.
748 if ((!VectorType::isValidElementType(I.getType()) &&
749 !I.getType()->isVoidTy()) ||
750 isa<ExtractElementInst>(I)) {
751 reportVectorizationFailure("Found unvectorizable type",
752 "instruction return type cannot be vectorized",
753 "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
754 return false;
755 }
756
757 // Check that the stored type is vectorizable.
758 if (auto *ST = dyn_cast<StoreInst>(&I)) {
759 Type *T = ST->getValueOperand()->getType();
760 if (!VectorType::isValidElementType(T)) {
761 reportVectorizationFailure("Store instruction cannot be vectorized",
762 "store instruction cannot be vectorized",
763 "CantVectorizeStore", ORE, TheLoop, ST);
764 return false;
765 }
766
767 // For nontemporal stores, check that a nontemporal vector version is
768 // supported on the target.
769 if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
770 // Arbitrarily try a vector of 2 elements.
771 auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
772 assert(VecTy && "did not find vectorized version of stored type");
773 if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
774 reportVectorizationFailure(
775 "nontemporal store instruction cannot be vectorized",
776 "nontemporal store instruction cannot be vectorized",
777 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
778 return false;
779 }
780 }
781
782 } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
783 if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
784 // For nontemporal loads, check that a nontemporal vector version is
785 // supported on the target (arbitrarily try a vector of 2 elements).
786 auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
787 assert(VecTy && "did not find vectorized version of load type");
788 if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
789 reportVectorizationFailure(
790 "nontemporal load instruction cannot be vectorized",
791 "nontemporal load instruction cannot be vectorized",
792 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
793 return false;
794 }
795 }
796
797 // FP instructions can allow unsafe algebra, thus vectorizable by
798 // non-IEEE-754 compliant SIMD units.
799 // This applies to floating-point math operations and calls, not memory
800 // operations, shuffles, or casts, as they don't change precision or
801 // semantics.
802 } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
803 !I.isFast()) {
804 LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
805 Hints->setPotentiallyUnsafe();
806 }
807
808 // Reduction instructions are allowed to have exit users.
809 // All other instructions must not have external users.
810 if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
811 // We can safely vectorize loops where instructions within the loop are
812 // used outside the loop only if the SCEV predicates within the loop is
813 // same as outside the loop. Allowing the exit means reusing the SCEV
814 // outside the loop.
815 if (PSE.getUnionPredicate().isAlwaysTrue()) {
816 AllowedExit.insert(&I);
817 continue;
818 }
819 reportVectorizationFailure("Value cannot be used outside the loop",
820 "value cannot be used outside the loop",
821 "ValueUsedOutsideLoop", ORE, TheLoop, &I);
822 return false;
823 }
824 } // next instr.
825 }
826
827 if (!PrimaryInduction) {
828 if (Inductions.empty()) {
829 reportVectorizationFailure("Did not find one integer induction var",
830 "loop induction variable could not be identified",
831 "NoInductionVariable", ORE, TheLoop);
832 return false;
833 } else if (!WidestIndTy) {
834 reportVectorizationFailure("Did not find one integer induction var",
835 "integer loop induction variable could not be identified",
836 "NoIntegerInductionVariable", ORE, TheLoop);
837 return false;
838 } else {
839 LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
840 }
841 }
842
843 // For first order recurrences, we use the previous value (incoming value from
844 // the latch) to check if it dominates all users of the recurrence. Bail out
845 // if we have to sink such an instruction for another recurrence, as the
846 // dominance requirement may not hold after sinking.
847 BasicBlock *LoopLatch = TheLoop->getLoopLatch();
848 if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
849 Instruction *V =
850 cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
851 return SinkAfter.find(V) != SinkAfter.end();
852 }))
853 return false;
854
855 // Now we know the widest induction type, check if our found induction
856 // is the same size. If it's not, unset it here and InnerLoopVectorizer
857 // will create another.
858 if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
859 PrimaryInduction = nullptr;
860
861 return true;
862 }
863
canVectorizeMemory()864 bool LoopVectorizationLegality::canVectorizeMemory() {
865 LAI = &(*GetLAA)(*TheLoop);
866 const OptimizationRemarkAnalysis *LAR = LAI->getReport();
867 if (LAR) {
868 ORE->emit([&]() {
869 return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
870 "loop not vectorized: ", *LAR);
871 });
872 }
873 if (!LAI->canVectorizeMemory())
874 return false;
875
876 if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
877 reportVectorizationFailure("Stores to a uniform address",
878 "write to a loop invariant address could not be vectorized",
879 "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
880 return false;
881 }
882 Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
883 PSE.addPredicate(LAI->getPSE().getUnionPredicate());
884
885 return true;
886 }
887
isInductionPhi(const Value * V)888 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
889 Value *In0 = const_cast<Value *>(V);
890 PHINode *PN = dyn_cast_or_null<PHINode>(In0);
891 if (!PN)
892 return false;
893
894 return Inductions.count(PN);
895 }
896
isCastedInductionVariable(const Value * V)897 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
898 auto *Inst = dyn_cast<Instruction>(V);
899 return (Inst && InductionCastsToIgnore.count(Inst));
900 }
901
isInductionVariable(const Value * V)902 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
903 return isInductionPhi(V) || isCastedInductionVariable(V);
904 }
905
isFirstOrderRecurrence(const PHINode * Phi)906 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
907 return FirstOrderRecurrences.count(Phi);
908 }
909
blockNeedsPredication(BasicBlock * BB)910 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
911 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
912 }
913
blockCanBePredicated(BasicBlock * BB,SmallPtrSetImpl<Value * > & SafePtrs,bool PreserveGuards)914 bool LoopVectorizationLegality::blockCanBePredicated(
915 BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
916 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
917
918 for (Instruction &I : *BB) {
919 // Check that we don't have a constant expression that can trap as operand.
920 for (Value *Operand : I.operands()) {
921 if (auto *C = dyn_cast<Constant>(Operand))
922 if (C->canTrap())
923 return false;
924 }
925
926 // We can predicate blocks with calls to assume, as long as we drop them in
927 // case we flatten the CFG via predication.
928 if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
929 ConditionalAssumes.insert(&I);
930 continue;
931 }
932
933 // We might be able to hoist the load.
934 if (I.mayReadFromMemory()) {
935 auto *LI = dyn_cast<LoadInst>(&I);
936 if (!LI)
937 return false;
938 if (!SafePtrs.count(LI->getPointerOperand())) {
939 // !llvm.mem.parallel_loop_access implies if-conversion safety.
940 // Otherwise, record that the load needs (real or emulated) masking
941 // and let the cost model decide.
942 if (!IsAnnotatedParallel || PreserveGuards)
943 MaskedOp.insert(LI);
944 continue;
945 }
946 }
947
948 if (I.mayWriteToMemory()) {
949 auto *SI = dyn_cast<StoreInst>(&I);
950 if (!SI)
951 return false;
952 // Predicated store requires some form of masking:
953 // 1) masked store HW instruction,
954 // 2) emulation via load-blend-store (only if safe and legal to do so,
955 // be aware on the race conditions), or
956 // 3) element-by-element predicate check and scalar store.
957 MaskedOp.insert(SI);
958 continue;
959 }
960 if (I.mayThrow())
961 return false;
962 }
963
964 return true;
965 }
966
canVectorizeWithIfConvert()967 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
968 if (!EnableIfConversion) {
969 reportVectorizationFailure("If-conversion is disabled",
970 "if-conversion is disabled",
971 "IfConversionDisabled",
972 ORE, TheLoop);
973 return false;
974 }
975
976 assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
977
978 // A list of pointers which are known to be dereferenceable within scope of
979 // the loop body for each iteration of the loop which executes. That is,
980 // the memory pointed to can be dereferenced (with the access size implied by
981 // the value's type) unconditionally within the loop header without
982 // introducing a new fault.
983 SmallPtrSet<Value *, 8> SafePointers;
984
985 // Collect safe addresses.
986 for (BasicBlock *BB : TheLoop->blocks()) {
987 if (!blockNeedsPredication(BB)) {
988 for (Instruction &I : *BB)
989 if (auto *Ptr = getLoadStorePointerOperand(&I))
990 SafePointers.insert(Ptr);
991 continue;
992 }
993
994 // For a block which requires predication, a address may be safe to access
995 // in the loop w/o predication if we can prove dereferenceability facts
996 // sufficient to ensure it'll never fault within the loop. For the moment,
997 // we restrict this to loads; stores are more complicated due to
998 // concurrency restrictions.
999 ScalarEvolution &SE = *PSE.getSE();
1000 for (Instruction &I : *BB) {
1001 LoadInst *LI = dyn_cast<LoadInst>(&I);
1002 if (LI && !mustSuppressSpeculation(*LI) &&
1003 isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
1004 SafePointers.insert(LI->getPointerOperand());
1005 }
1006 }
1007
1008 // Collect the blocks that need predication.
1009 BasicBlock *Header = TheLoop->getHeader();
1010 for (BasicBlock *BB : TheLoop->blocks()) {
1011 // We don't support switch statements inside loops.
1012 if (!isa<BranchInst>(BB->getTerminator())) {
1013 reportVectorizationFailure("Loop contains a switch statement",
1014 "loop contains a switch statement",
1015 "LoopContainsSwitch", ORE, TheLoop,
1016 BB->getTerminator());
1017 return false;
1018 }
1019
1020 // We must be able to predicate all blocks that need to be predicated.
1021 if (blockNeedsPredication(BB)) {
1022 if (!blockCanBePredicated(BB, SafePointers)) {
1023 reportVectorizationFailure(
1024 "Control flow cannot be substituted for a select",
1025 "control flow cannot be substituted for a select",
1026 "NoCFGForSelect", ORE, TheLoop,
1027 BB->getTerminator());
1028 return false;
1029 }
1030 } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1031 reportVectorizationFailure(
1032 "Control flow cannot be substituted for a select",
1033 "control flow cannot be substituted for a select",
1034 "NoCFGForSelect", ORE, TheLoop,
1035 BB->getTerminator());
1036 return false;
1037 }
1038 }
1039
1040 // We can if-convert this loop.
1041 return true;
1042 }
1043
1044 // Helper function to canVectorizeLoopNestCFG.
canVectorizeLoopCFG(Loop * Lp,bool UseVPlanNativePath)1045 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1046 bool UseVPlanNativePath) {
1047 assert((UseVPlanNativePath || Lp->empty()) &&
1048 "VPlan-native path is not enabled.");
1049
1050 // TODO: ORE should be improved to show more accurate information when an
1051 // outer loop can't be vectorized because a nested loop is not understood or
1052 // legal. Something like: "outer_loop_location: loop not vectorized:
1053 // (inner_loop_location) loop control flow is not understood by vectorizer".
1054
1055 // Store the result and return it at the end instead of exiting early, in case
1056 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1057 bool Result = true;
1058 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1059
1060 // We must have a loop in canonical form. Loops with indirectbr in them cannot
1061 // be canonicalized.
1062 if (!Lp->getLoopPreheader()) {
1063 reportVectorizationFailure("Loop doesn't have a legal pre-header",
1064 "loop control flow is not understood by vectorizer",
1065 "CFGNotUnderstood", ORE, TheLoop);
1066 if (DoExtraAnalysis)
1067 Result = false;
1068 else
1069 return false;
1070 }
1071
1072 // We must have a single backedge.
1073 if (Lp->getNumBackEdges() != 1) {
1074 reportVectorizationFailure("The loop must have a single backedge",
1075 "loop control flow is not understood by vectorizer",
1076 "CFGNotUnderstood", ORE, TheLoop);
1077 if (DoExtraAnalysis)
1078 Result = false;
1079 else
1080 return false;
1081 }
1082
1083 // We must have a single exiting block.
1084 if (!Lp->getExitingBlock()) {
1085 reportVectorizationFailure("The loop must have an exiting block",
1086 "loop control flow is not understood by vectorizer",
1087 "CFGNotUnderstood", ORE, TheLoop);
1088 if (DoExtraAnalysis)
1089 Result = false;
1090 else
1091 return false;
1092 }
1093
1094 // We only handle bottom-tested loops, i.e. loop in which the condition is
1095 // checked at the end of each iteration. With that we can assume that all
1096 // instructions in the loop are executed the same number of times.
1097 if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1098 reportVectorizationFailure("The exiting block is not the loop latch",
1099 "loop control flow is not understood by vectorizer",
1100 "CFGNotUnderstood", ORE, TheLoop);
1101 if (DoExtraAnalysis)
1102 Result = false;
1103 else
1104 return false;
1105 }
1106
1107 return Result;
1108 }
1109
canVectorizeLoopNestCFG(Loop * Lp,bool UseVPlanNativePath)1110 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1111 Loop *Lp, bool UseVPlanNativePath) {
1112 // Store the result and return it at the end instead of exiting early, in case
1113 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1114 bool Result = true;
1115 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1116 if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1117 if (DoExtraAnalysis)
1118 Result = false;
1119 else
1120 return false;
1121 }
1122
1123 // Recursively check whether the loop control flow of nested loops is
1124 // understood.
1125 for (Loop *SubLp : *Lp)
1126 if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1127 if (DoExtraAnalysis)
1128 Result = false;
1129 else
1130 return false;
1131 }
1132
1133 return Result;
1134 }
1135
canVectorize(bool UseVPlanNativePath)1136 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1137 // Store the result and return it at the end instead of exiting early, in case
1138 // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1139 bool Result = true;
1140
1141 bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1142 // Check whether the loop-related control flow in the loop nest is expected by
1143 // vectorizer.
1144 if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1145 if (DoExtraAnalysis)
1146 Result = false;
1147 else
1148 return false;
1149 }
1150
1151 // We need to have a loop header.
1152 LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1153 << '\n');
1154
1155 // Specific checks for outer loops. We skip the remaining legal checks at this
1156 // point because they don't support outer loops.
1157 if (!TheLoop->empty()) {
1158 assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1159
1160 if (!canVectorizeOuterLoop()) {
1161 reportVectorizationFailure("Unsupported outer loop",
1162 "unsupported outer loop",
1163 "UnsupportedOuterLoop",
1164 ORE, TheLoop);
1165 // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1166 // outer loops.
1167 return false;
1168 }
1169
1170 LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1171 return Result;
1172 }
1173
1174 assert(TheLoop->empty() && "Inner loop expected.");
1175 // Check if we can if-convert non-single-bb loops.
1176 unsigned NumBlocks = TheLoop->getNumBlocks();
1177 if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1178 LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1179 if (DoExtraAnalysis)
1180 Result = false;
1181 else
1182 return false;
1183 }
1184
1185 // Check if we can vectorize the instructions and CFG in this loop.
1186 if (!canVectorizeInstrs()) {
1187 LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1188 if (DoExtraAnalysis)
1189 Result = false;
1190 else
1191 return false;
1192 }
1193
1194 // Go over each instruction and look at memory deps.
1195 if (!canVectorizeMemory()) {
1196 LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1197 if (DoExtraAnalysis)
1198 Result = false;
1199 else
1200 return false;
1201 }
1202
1203 LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1204 << (LAI->getRuntimePointerChecking()->Need
1205 ? " (with a runtime bound check)"
1206 : "")
1207 << "!\n");
1208
1209 unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1210 if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1211 SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1212
1213 if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1214 reportVectorizationFailure("Too many SCEV checks needed",
1215 "Too many SCEV assumptions need to be made and checked at runtime",
1216 "TooManySCEVRunTimeChecks", ORE, TheLoop);
1217 if (DoExtraAnalysis)
1218 Result = false;
1219 else
1220 return false;
1221 }
1222
1223 // Okay! We've done all the tests. If any have failed, return false. Otherwise
1224 // we can vectorize, and at this point we don't have any other mem analysis
1225 // which may limit our maximum vectorization factor, so just return true with
1226 // no restrictions.
1227 return Result;
1228 }
1229
prepareToFoldTailByMasking()1230 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1231
1232 LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1233
1234 SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1235
1236 for (auto &Reduction : getReductionVars())
1237 ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1238
1239 // TODO: handle non-reduction outside users when tail is folded by masking.
1240 for (auto *AE : AllowedExit) {
1241 // Check that all users of allowed exit values are inside the loop or
1242 // are the live-out of a reduction.
1243 if (ReductionLiveOuts.count(AE))
1244 continue;
1245 for (User *U : AE->users()) {
1246 Instruction *UI = cast<Instruction>(U);
1247 if (TheLoop->contains(UI))
1248 continue;
1249 reportVectorizationFailure(
1250 "Cannot fold tail by masking, loop has an outside user for",
1251 "Cannot fold tail by masking in the presence of live outs.",
1252 "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1253 return false;
1254 }
1255 }
1256
1257 // The list of pointers that we can safely read and write to remains empty.
1258 SmallPtrSet<Value *, 8> SafePointers;
1259
1260 // Check and mark all blocks for predication, including those that ordinarily
1261 // do not need predication such as the header block.
1262 for (BasicBlock *BB : TheLoop->blocks()) {
1263 if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1264 reportVectorizationFailure(
1265 "Cannot fold tail by masking as required",
1266 "control flow cannot be substituted for a select",
1267 "NoCFGForSelect", ORE, TheLoop,
1268 BB->getTerminator());
1269 return false;
1270 }
1271 }
1272
1273 LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1274 return true;
1275 }
1276
1277 } // namespace llvm
1278