1 //===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the interface for the loop cache analysis. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H 15 #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H 16 17 #include "llvm/Analysis/AliasAnalysis.h" 18 #include "llvm/Analysis/DependenceAnalysis.h" 19 #include "llvm/Analysis/LoopAnalysisManager.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/ScalarEvolution.h" 22 #include "llvm/Analysis/TargetTransformInfo.h" 23 #include "llvm/IR/Instructions.h" 24 #include "llvm/Pass.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 class LPMUpdater; 30 using CacheCostTy = int64_t; 31 using LoopVectorTy = SmallVector<Loop *, 8>; 32 33 /// Represents a memory reference as a base pointer and a set of indexing 34 /// operations. For example given the array reference A[i][2j+1][3k+2] in a 35 /// 3-dim loop nest: 36 /// for(i=0;i<n;++i) 37 /// for(j=0;j<m;++j) 38 /// for(k=0;k<o;++k) 39 /// ... A[i][2j+1][3k+2] ... 40 /// We expect: 41 /// BasePointer -> A 42 /// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>] 43 /// Sizes -> [m][o][4] 44 class IndexedReference { 45 friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); 46 47 public: 48 /// Construct an indexed reference given a \p StoreOrLoadInst instruction. 49 IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, 50 ScalarEvolution &SE); 51 52 bool isValid() const { return IsValid; } 53 const SCEV *getBasePointer() const { return BasePointer; } 54 size_t getNumSubscripts() const { return Subscripts.size(); } 55 const SCEV *getSubscript(unsigned SubNum) const { 56 assert(SubNum < getNumSubscripts() && "Invalid subscript number"); 57 return Subscripts[SubNum]; 58 } 59 const SCEV *getFirstSubscript() const { 60 assert(!Subscripts.empty() && "Expecting non-empty container"); 61 return Subscripts.front(); 62 } 63 const SCEV *getLastSubscript() const { 64 assert(!Subscripts.empty() && "Expecting non-empty container"); 65 return Subscripts.back(); 66 } 67 68 /// Return true/false if the current object and the indexed reference \p Other 69 /// are/aren't in the same cache line of size \p CLS. Two references are in 70 /// the same chace line iff the distance between them in the innermost 71 /// dimension is less than the cache line size. Return None if unsure. 72 Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS, 73 AliasAnalysis &AA) const; 74 75 /// Return true if the current object and the indexed reference \p Other 76 /// have distance smaller than \p MaxDistance in the dimension associated with 77 /// the given loop \p L. Return false if the distance is not smaller than \p 78 /// MaxDistance and None if unsure. 79 Optional<bool> hasTemporalReuse(const IndexedReference &Other, 80 unsigned MaxDistance, const Loop &L, 81 DependenceInfo &DI, AliasAnalysis &AA) const; 82 83 /// Compute the cost of the reference w.r.t. the given loop \p L when it is 84 /// considered in the innermost position in the loop nest. 85 /// The cost is defined as: 86 /// - equal to one if the reference is loop invariant, or 87 /// - equal to '(TripCount * stride) / cache_line_size' if: 88 /// + the reference stride is less than the cache line size, and 89 /// + the coefficient of this loop's index variable used in all other 90 /// subscripts is zero 91 /// - or otherwise equal to 'TripCount'. 92 CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const; 93 94 private: 95 /// Attempt to delinearize the indexed reference. 96 bool delinearize(const LoopInfo &LI); 97 98 /// Return true if the index reference is invariant with respect to loop \p L. 99 bool isLoopInvariant(const Loop &L) const; 100 101 /// Return true if the indexed reference is 'consecutive' in loop \p L. 102 /// An indexed reference is 'consecutive' if the only coefficient that uses 103 /// the loop induction variable is the rightmost one, and the access stride is 104 /// smaller than the cache line size \p CLS. 105 bool isConsecutive(const Loop &L, unsigned CLS) const; 106 107 /// Return the coefficient used in the rightmost dimension. 108 const SCEV *getLastCoefficient() const; 109 110 /// Return true if the coefficient corresponding to induction variable of 111 /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L. 112 bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, 113 const Loop &L) const; 114 115 /// Verify that the given \p Subscript is 'well formed' (must be a simple add 116 /// recurrence). 117 bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const; 118 119 /// Return true if the given reference \p Other is definetely aliased with 120 /// the indexed reference represented by this class. 121 bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const; 122 123 private: 124 /// True if the reference can be delinearized, false otherwise. 125 bool IsValid = false; 126 127 /// Represent the memory reference instruction. 128 Instruction &StoreOrLoadInst; 129 130 /// The base pointer of the memory reference. 131 const SCEV *BasePointer = nullptr; 132 133 /// The subscript (indexes) of the memory reference. 134 SmallVector<const SCEV *, 3> Subscripts; 135 136 /// The dimensions of the memory reference. 137 SmallVector<const SCEV *, 3> Sizes; 138 139 ScalarEvolution &SE; 140 }; 141 142 /// A reference group represents a set of memory references that exhibit 143 /// temporal or spacial reuse. Two references belong to the same 144 /// reference group with respect to a inner loop L iff: 145 /// 1. they have a loop independent dependency, or 146 /// 2. they have a loop carried dependence with a small dependence distance 147 /// (e.g. less than 2) carried by the inner loop, or 148 /// 3. they refer to the same array, and the subscript in their innermost 149 /// dimension is less than or equal to 'd' (where 'd' is less than the cache 150 /// line size) 151 /// 152 /// Intuitively a reference group represents memory references that access 153 /// the same cache line. Conditions 1,2 above account for temporal reuse, while 154 /// contition 3 accounts for spacial reuse. 155 using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>; 156 using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>; 157 158 /// \c CacheCost represents the estimated cost of a inner loop as the number of 159 /// cache lines used by the memory references it contains. 160 /// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of 161 /// the cache costs of all of its reference groups when the loop is considered 162 /// to be in the innermost position in the nest. 163 /// A reference group represents memory references that fall into the same cache 164 /// line. Each reference group is analysed with respect to the innermost loop in 165 /// a loop nest. The cost of a reference is defined as follow: 166 /// - one if it is loop invariant w.r.t the innermost loop, 167 /// - equal to the loop trip count divided by the cache line times the 168 /// reference stride if the reference stride is less than the cache line 169 /// size (CLS), and the coefficient of this loop's index variable used in all 170 /// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride)) 171 /// - equal to the innermost loop trip count if the reference stride is greater 172 /// or equal to the cache line size CLS. 173 class CacheCost { 174 friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); 175 using LoopTripCountTy = std::pair<const Loop *, unsigned>; 176 using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>; 177 178 public: 179 static CacheCostTy constexpr InvalidCost = -1; 180 181 /// Construct a CacheCost object for the loop nest described by \p Loops. 182 /// The optional parameter \p TRT can be used to specify the max. distance 183 /// between array elements accessed in a loop so that the elements are 184 /// classified to have temporal reuse. 185 CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, 186 TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI, 187 Optional<unsigned> TRT = None); 188 189 /// Create a CacheCost for the loop nest rooted by \p Root. 190 /// The optional parameter \p TRT can be used to specify the max. distance 191 /// between array elements accessed in a loop so that the elements are 192 /// classified to have temporal reuse. 193 static std::unique_ptr<CacheCost> 194 getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, 195 Optional<unsigned> TRT = None); 196 197 /// Return the estimated cost of loop \p L if the given loop is part of the 198 /// loop nest associated with this object. Return -1 otherwise. 199 CacheCostTy getLoopCost(const Loop &L) const { 200 auto IT = std::find_if( 201 LoopCosts.begin(), LoopCosts.end(), 202 [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; }); 203 return (IT != LoopCosts.end()) ? (*IT).second : -1; 204 } 205 206 /// Return the estimated ordered loop costs. 207 const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; } 208 209 private: 210 /// Calculate the cache footprint of each loop in the nest (when it is 211 /// considered to be in the innermost position). 212 void calculateCacheFootprint(); 213 214 /// Partition store/load instructions in the loop nest into reference groups. 215 /// Two or more memory accesses belong in the same reference group if they 216 /// share the same cache line. 217 bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const; 218 219 /// Calculate the cost of the given loop \p L assuming it is the innermost 220 /// loop in nest. 221 CacheCostTy computeLoopCacheCost(const Loop &L, 222 const ReferenceGroupsTy &RefGroups) const; 223 224 /// Compute the cost of a representative reference in reference group \p RG 225 /// when the given loop \p L is considered as the innermost loop in the nest. 226 /// The computed cost is an estimate for the number of cache lines used by the 227 /// reference group. The representative reference cost is defined as: 228 /// - equal to one if the reference is loop invariant, or 229 /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's 230 /// induction variable is used only in the reference subscript associated 231 /// with loop \p L, and (b) the reference stride is less than the cache 232 /// line size, or 233 /// - TripCount otherwise 234 CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG, 235 const Loop &L) const; 236 237 /// Sort the LoopCosts vector by decreasing cache cost. 238 void sortLoopCosts() { 239 sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) { 240 return A.second > B.second; 241 }); 242 } 243 244 private: 245 /// Loops in the loop nest associated with this object. 246 LoopVectorTy Loops; 247 248 /// Trip counts for the loops in the loop nest associated with this object. 249 SmallVector<LoopTripCountTy, 3> TripCounts; 250 251 /// Cache costs for the loops in the loop nest associated with this object. 252 SmallVector<LoopCacheCostTy, 3> LoopCosts; 253 254 /// The max. distance between array elements accessed in a loop so that the 255 /// elements are classified to have temporal reuse. 256 Optional<unsigned> TRT; 257 258 const LoopInfo &LI; 259 ScalarEvolution &SE; 260 TargetTransformInfo &TTI; 261 AliasAnalysis &AA; 262 DependenceInfo &DI; 263 }; 264 265 raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); 266 raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); 267 268 /// Printer pass for the \c CacheCost results. 269 class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> { 270 raw_ostream &OS; 271 272 public: 273 explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {} 274 275 PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, 276 LoopStandardAnalysisResults &AR, LPMUpdater &U); 277 }; 278 279 } // namespace llvm 280 281 #endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H 282