1 //===- LoopVectorize.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
10 // and generates target-independent LLVM-IR.
11 // The vectorizer uses the TargetTransformInfo analysis to estimate the costs
12 // of instructions in order to estimate the profitability of vectorization.
13 //
14 // The loop vectorizer combines consecutive loop iterations into a single
15 // 'wide' iteration. After this transformation the index is incremented
16 // by the SIMD vector width, and not by one.
17 //
18 // This pass has three parts:
19 // 1. The main loop pass that drives the different parts.
20 // 2. LoopVectorizationLegality - A unit that checks for the legality
21 //    of the vectorization.
22 // 3. InnerLoopVectorizer - A unit that performs the actual
23 //    widening of instructions.
24 // 4. LoopVectorizationCostModel - A unit that checks for the profitability
25 //    of vectorization. It decides on the optimal vector width, which
26 //    can be one, if vectorization is not profitable.
27 //
28 // There is a development effort going on to migrate loop vectorizer to the
29 // VPlan infrastructure and to introduce outer loop vectorization support (see
30 // docs/Proposal/VectorizationPlan.rst and
31 // http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this
32 // purpose, we temporarily introduced the VPlan-native vectorization path: an
33 // alternative vectorization path that is natively implemented on top of the
34 // VPlan infrastructure. See EnableVPlanNativePath for enabling.
35 //
36 //===----------------------------------------------------------------------===//
37 //
38 // The reduction-variable vectorization is based on the paper:
39 //  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
40 //
41 // Variable uniformity checks are inspired by:
42 //  Karrenberg, R. and Hack, S. Whole Function Vectorization.
43 //
44 // The interleaved access vectorization is based on the paper:
45 //  Dorit Nuzman, Ira Rosen and Ayal Zaks.  Auto-Vectorization of Interleaved
46 //  Data for SIMD
47 //
48 // Other ideas/concepts are from:
49 //  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
50 //
51 //  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of
52 //  Vectorizing Compilers.
53 //
54 //===----------------------------------------------------------------------===//
55 
56 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
57 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
58 
59 #include "llvm/IR/PassManager.h"
60 #include "llvm/Support/CommandLine.h"
61 #include <functional>
62 
63 namespace llvm {
64 
65 class AAResults;
66 class AssumptionCache;
67 class BlockFrequencyInfo;
68 class DemandedBits;
69 class DominatorTree;
70 class Function;
71 class Loop;
72 class LoopAccessInfo;
73 class LoopInfo;
74 class OptimizationRemarkEmitter;
75 class ProfileSummaryInfo;
76 class ScalarEvolution;
77 class TargetLibraryInfo;
78 class TargetTransformInfo;
79 
80 extern cl::opt<bool> EnableLoopInterleaving;
81 extern cl::opt<bool> EnableLoopVectorization;
82 
83 /// A marker to determine if extra passes after loop vectorization should be
84 /// run.
85 struct ShouldRunExtraVectorPasses
86     : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> {
87   static AnalysisKey Key;
88   struct Result {
89     bool invalidate(Function &F, const PreservedAnalyses &PA,
90                     FunctionAnalysisManager::Invalidator &) {
91       // Check whether the analysis has been explicitly invalidated. Otherwise,
92       // it remains preserved.
93       auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>();
94       return !PAC.preservedWhenStateless();
95     }
96   };
97 
98   Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
99 };
100 
101 /// A pass manager to run a set of extra function simplification passes after
102 /// vectorization, if requested. LoopVectorize caches the
103 /// ShouldRunExtraVectorPasses analysis to request extra simplifications, if
104 /// they could be beneficial.
105 struct ExtraVectorPassManager : public FunctionPassManager {
106   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
107     auto PA = PreservedAnalyses::all();
108     if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F))
109       PA.intersect(FunctionPassManager::run(F, AM));
110     PA.abandon<ShouldRunExtraVectorPasses>();
111     return PA;
112   }
113 };
114 
115 struct LoopVectorizeOptions {
116   /// If false, consider all loops for interleaving.
117   /// If true, only loops that explicitly request interleaving are considered.
118   bool InterleaveOnlyWhenForced;
119 
120   /// If false, consider all loops for vectorization.
121   /// If true, only loops that explicitly request vectorization are considered.
122   bool VectorizeOnlyWhenForced;
123 
124   /// The current defaults when creating the pass with no arguments are:
125   /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This
126   /// means that interleaving default is consistent with the cl::opt flag, while
127   /// vectorization is not.
128   /// FIXME: The default for EnableLoopVectorization in the cl::opt should be
129   /// set to true, and the corresponding change to account for this be made in
130   /// opt.cpp. The initializations below will become:
131   /// InterleaveOnlyWhenForced(!EnableLoopInterleaving)
132   /// VectorizeOnlyWhenForced(!EnableLoopVectorization).
133   LoopVectorizeOptions()
134       : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {}
135   LoopVectorizeOptions(bool InterleaveOnlyWhenForced,
136                        bool VectorizeOnlyWhenForced)
137       : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced),
138         VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {}
139 
140   LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) {
141     InterleaveOnlyWhenForced = Value;
142     return *this;
143   }
144 
145   LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) {
146     VectorizeOnlyWhenForced = Value;
147     return *this;
148   }
149 };
150 
151 /// Storage for information about made changes.
152 struct LoopVectorizeResult {
153   bool MadeAnyChange;
154   bool MadeCFGChange;
155 
156   LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange)
157       : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {}
158 };
159 
160 /// The LoopVectorize Pass.
161 struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
162 private:
163   /// If false, consider all loops for interleaving.
164   /// If true, only loops that explicitly request interleaving are considered.
165   bool InterleaveOnlyWhenForced;
166 
167   /// If false, consider all loops for vectorization.
168   /// If true, only loops that explicitly request vectorization are considered.
169   bool VectorizeOnlyWhenForced;
170 
171 public:
172   LoopVectorizePass(LoopVectorizeOptions Opts = {});
173 
174   ScalarEvolution *SE;
175   LoopInfo *LI;
176   TargetTransformInfo *TTI;
177   DominatorTree *DT;
178   BlockFrequencyInfo *BFI;
179   TargetLibraryInfo *TLI;
180   DemandedBits *DB;
181   AAResults *AA;
182   AssumptionCache *AC;
183   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
184   OptimizationRemarkEmitter *ORE;
185   ProfileSummaryInfo *PSI;
186 
187   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
188   void printPipeline(raw_ostream &OS,
189                      function_ref<StringRef(StringRef)> MapClassName2PassName);
190 
191   // Shim for old PM.
192   LoopVectorizeResult
193   runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_,
194           TargetTransformInfo &TTI_, DominatorTree &DT_,
195           BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_,
196           AAResults &AA_, AssumptionCache &AC_,
197           std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
198           OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
199 
200   bool processLoop(Loop *L);
201 };
202 
203 /// Reports a vectorization failure: print \p DebugMsg for debugging
204 /// purposes along with the corresponding optimization remark \p RemarkName.
205 /// If \p I is passed, it is an instruction that prevents vectorization.
206 /// Otherwise, the loop \p TheLoop is used for the location of the remark.
207 void reportVectorizationFailure(const StringRef DebugMsg,
208     const StringRef OREMsg, const StringRef ORETag,
209     OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);
210 
211 /// Reports an informative message: print \p Msg for debugging purposes as well
212 /// as an optimization remark. Uses either \p I as location of the remark, or
213 /// otherwise \p TheLoop.
214 void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag,
215                              OptimizationRemarkEmitter *ORE, Loop *TheLoop,
216                              Instruction *I = nullptr);
217 
218 } // end namespace llvm
219 
220 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
221