1 //===- LoopVectorize.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops 10 // and generates target-independent LLVM-IR. 11 // The vectorizer uses the TargetTransformInfo analysis to estimate the costs 12 // of instructions in order to estimate the profitability of vectorization. 13 // 14 // The loop vectorizer combines consecutive loop iterations into a single 15 // 'wide' iteration. After this transformation the index is incremented 16 // by the SIMD vector width, and not by one. 17 // 18 // This pass has three parts: 19 // 1. The main loop pass that drives the different parts. 20 // 2. LoopVectorizationLegality - A unit that checks for the legality 21 // of the vectorization. 22 // 3. InnerLoopVectorizer - A unit that performs the actual 23 // widening of instructions. 24 // 4. LoopVectorizationCostModel - A unit that checks for the profitability 25 // of vectorization. It decides on the optimal vector width, which 26 // can be one, if vectorization is not profitable. 27 // 28 // There is a development effort going on to migrate loop vectorizer to the 29 // VPlan infrastructure and to introduce outer loop vectorization support (see 30 // docs/Proposal/VectorizationPlan.rst and 31 // http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this 32 // purpose, we temporarily introduced the VPlan-native vectorization path: an 33 // alternative vectorization path that is natively implemented on top of the 34 // VPlan infrastructure. See EnableVPlanNativePath for enabling. 35 // 36 //===----------------------------------------------------------------------===// 37 // 38 // The reduction-variable vectorization is based on the paper: 39 // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. 40 // 41 // Variable uniformity checks are inspired by: 42 // Karrenberg, R. and Hack, S. Whole Function Vectorization. 43 // 44 // The interleaved access vectorization is based on the paper: 45 // Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved 46 // Data for SIMD 47 // 48 // Other ideas/concepts are from: 49 // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. 50 // 51 // S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of 52 // Vectorizing Compilers. 53 // 54 //===----------------------------------------------------------------------===// 55 56 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H 57 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H 58 59 #include "llvm/IR/PassManager.h" 60 #include "llvm/Support/CommandLine.h" 61 #include <functional> 62 63 namespace llvm { 64 65 class AssumptionCache; 66 class BlockFrequencyInfo; 67 class DemandedBits; 68 class DominatorTree; 69 class Function; 70 class Loop; 71 class LoopAccessInfoManager; 72 class LoopInfo; 73 class OptimizationRemarkEmitter; 74 class ProfileSummaryInfo; 75 class ScalarEvolution; 76 class TargetLibraryInfo; 77 class TargetTransformInfo; 78 79 extern cl::opt<bool> EnableLoopInterleaving; 80 extern cl::opt<bool> EnableLoopVectorization; 81 82 /// A marker to determine if extra passes after loop vectorization should be 83 /// run. 84 struct ShouldRunExtraVectorPasses 85 : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> { 86 static AnalysisKey Key; 87 struct Result { invalidateShouldRunExtraVectorPasses::Result88 bool invalidate(Function &F, const PreservedAnalyses &PA, 89 FunctionAnalysisManager::Invalidator &) { 90 // Check whether the analysis has been explicitly invalidated. Otherwise, 91 // it remains preserved. 92 auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>(); 93 return !PAC.preservedWhenStateless(); 94 } 95 }; 96 runShouldRunExtraVectorPasses97 Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); } 98 }; 99 100 /// A pass manager to run a set of extra function simplification passes after 101 /// vectorization, if requested. LoopVectorize caches the 102 /// ShouldRunExtraVectorPasses analysis to request extra simplifications, if 103 /// they could be beneficial. 104 struct ExtraVectorPassManager : public FunctionPassManager { runExtraVectorPassManager105 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { 106 auto PA = PreservedAnalyses::all(); 107 if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F)) 108 PA.intersect(FunctionPassManager::run(F, AM)); 109 PA.abandon<ShouldRunExtraVectorPasses>(); 110 return PA; 111 } 112 }; 113 114 struct LoopVectorizeOptions { 115 /// If false, consider all loops for interleaving. 116 /// If true, only loops that explicitly request interleaving are considered. 117 bool InterleaveOnlyWhenForced; 118 119 /// If false, consider all loops for vectorization. 120 /// If true, only loops that explicitly request vectorization are considered. 121 bool VectorizeOnlyWhenForced; 122 123 /// The current defaults when creating the pass with no arguments are: 124 /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This 125 /// means that interleaving default is consistent with the cl::opt flag, while 126 /// vectorization is not. 127 /// FIXME: The default for EnableLoopVectorization in the cl::opt should be 128 /// set to true, and the corresponding change to account for this be made in 129 /// opt.cpp. The initializations below will become: 130 /// InterleaveOnlyWhenForced(!EnableLoopInterleaving) 131 /// VectorizeOnlyWhenForced(!EnableLoopVectorization). LoopVectorizeOptionsLoopVectorizeOptions132 LoopVectorizeOptions() 133 : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {} LoopVectorizeOptionsLoopVectorizeOptions134 LoopVectorizeOptions(bool InterleaveOnlyWhenForced, 135 bool VectorizeOnlyWhenForced) 136 : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced), 137 VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {} 138 setInterleaveOnlyWhenForcedLoopVectorizeOptions139 LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) { 140 InterleaveOnlyWhenForced = Value; 141 return *this; 142 } 143 setVectorizeOnlyWhenForcedLoopVectorizeOptions144 LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) { 145 VectorizeOnlyWhenForced = Value; 146 return *this; 147 } 148 }; 149 150 /// Storage for information about made changes. 151 struct LoopVectorizeResult { 152 bool MadeAnyChange; 153 bool MadeCFGChange; 154 LoopVectorizeResultLoopVectorizeResult155 LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange) 156 : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {} 157 }; 158 159 /// The LoopVectorize Pass. 160 struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> { 161 private: 162 /// If false, consider all loops for interleaving. 163 /// If true, only loops that explicitly request interleaving are considered. 164 bool InterleaveOnlyWhenForced; 165 166 /// If false, consider all loops for vectorization. 167 /// If true, only loops that explicitly request vectorization are considered. 168 bool VectorizeOnlyWhenForced; 169 170 public: 171 LoopVectorizePass(LoopVectorizeOptions Opts = {}); 172 173 ScalarEvolution *SE; 174 LoopInfo *LI; 175 TargetTransformInfo *TTI; 176 DominatorTree *DT; 177 BlockFrequencyInfo *BFI; 178 TargetLibraryInfo *TLI; 179 DemandedBits *DB; 180 AssumptionCache *AC; 181 LoopAccessInfoManager *LAIs; 182 OptimizationRemarkEmitter *ORE; 183 ProfileSummaryInfo *PSI; 184 185 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 186 void printPipeline(raw_ostream &OS, 187 function_ref<StringRef(StringRef)> MapClassName2PassName); 188 189 // Shim for old PM. 190 LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, 191 TargetTransformInfo &TTI_, DominatorTree &DT_, 192 BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, 193 DemandedBits &DB_, AssumptionCache &AC_, 194 LoopAccessInfoManager &LAIs_, 195 OptimizationRemarkEmitter &ORE_, 196 ProfileSummaryInfo *PSI_); 197 198 bool processLoop(Loop *L); 199 }; 200 201 /// Reports a vectorization failure: print \p DebugMsg for debugging 202 /// purposes along with the corresponding optimization remark \p RemarkName. 203 /// If \p I is passed, it is an instruction that prevents vectorization. 204 /// Otherwise, the loop \p TheLoop is used for the location of the remark. 205 void reportVectorizationFailure(const StringRef DebugMsg, 206 const StringRef OREMsg, const StringRef ORETag, 207 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr); 208 209 /// Reports an informative message: print \p Msg for debugging purposes as well 210 /// as an optimization remark. Uses either \p I as location of the remark, or 211 /// otherwise \p TheLoop. 212 void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag, 213 OptimizationRemarkEmitter *ORE, Loop *TheLoop, 214 Instruction *I = nullptr); 215 216 } // end namespace llvm 217 218 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H 219