1 //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 ///
11 /// \file
12 /// This file defines the implementation for the loop cache analysis.
13 /// The implementation is largely based on the following paper:
14 ///
15 /// Compiler Optimizations for Improving Data Locality
16 /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17 /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18 ///
19 /// The general approach taken to estimate the number of cache lines used by the
20 /// memory references in an inner loop is:
21 /// 1. Partition memory references that exhibit temporal or spacial reuse
22 /// into reference groups.
23 /// 2. For each loop L in the a loop nest LN:
24 /// a. Compute the cost of the reference group
25 /// b. Compute the loop cost by summing up the reference groups costs
26 //===----------------------------------------------------------------------===//
27
28 #include "llvm/Analysis/LoopCacheAnalysis.h"
29 #include "llvm/ADT/BreadthFirstIterator.h"
30 #include "llvm/ADT/Sequence.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/Delinearization.h"
34 #include "llvm/Analysis/DependenceAnalysis.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
37 #include "llvm/Analysis/TargetTransformInfo.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40
41 using namespace llvm;
42
43 #define DEBUG_TYPE "loop-cache-cost"
44
45 static cl::opt<unsigned> DefaultTripCount(
46 "default-trip-count", cl::init(100), cl::Hidden,
47 cl::desc("Use this to specify the default trip count of a loop"));
48
49 // In this analysis two array references are considered to exhibit temporal
50 // reuse if they access either the same memory location, or a memory location
51 // with distance smaller than a configurable threshold.
52 static cl::opt<unsigned> TemporalReuseThreshold(
53 "temporal-reuse-threshold", cl::init(2), cl::Hidden,
54 cl::desc("Use this to specify the max. distance between array elements "
55 "accessed in a loop so that the elements are classified to have "
56 "temporal reuse"));
57
58 /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
59 /// nullptr if any loops in the loop vector supplied has more than one sibling.
60 /// The loop vector is expected to contain loops collected in breadth-first
61 /// order.
getInnerMostLoop(const LoopVectorTy & Loops)62 static Loop *getInnerMostLoop(const LoopVectorTy &Loops) {
63 assert(!Loops.empty() && "Expecting a non-empy loop vector");
64
65 Loop *LastLoop = Loops.back();
66 Loop *ParentLoop = LastLoop->getParentLoop();
67
68 if (ParentLoop == nullptr) {
69 assert(Loops.size() == 1 && "Expecting a single loop");
70 return LastLoop;
71 }
72
73 return (llvm::is_sorted(Loops,
74 [](const Loop *L1, const Loop *L2) {
75 return L1->getLoopDepth() < L2->getLoopDepth();
76 }))
77 ? LastLoop
78 : nullptr;
79 }
80
isOneDimensionalArray(const SCEV & AccessFn,const SCEV & ElemSize,const Loop & L,ScalarEvolution & SE)81 static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
82 const Loop &L, ScalarEvolution &SE) {
83 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
84 if (!AR || !AR->isAffine())
85 return false;
86
87 assert(AR->getLoop() && "AR should have a loop");
88
89 // Check that start and increment are not add recurrences.
90 const SCEV *Start = AR->getStart();
91 const SCEV *Step = AR->getStepRecurrence(SE);
92 if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
93 return false;
94
95 // Check that start and increment are both invariant in the loop.
96 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
97 return false;
98
99 const SCEV *StepRec = AR->getStepRecurrence(SE);
100 if (StepRec && SE.isKnownNegative(StepRec))
101 StepRec = SE.getNegativeSCEV(StepRec);
102
103 return StepRec == &ElemSize;
104 }
105
106 /// Compute the trip count for the given loop \p L. Return the SCEV expression
107 /// for the trip count or nullptr if it cannot be computed.
computeTripCount(const Loop & L,ScalarEvolution & SE)108 static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
109 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
110 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
111 !isa<SCEVConstant>(BackedgeTakenCount))
112 return nullptr;
113 return SE.getTripCountFromExitCount(BackedgeTakenCount);
114 }
115
116 //===----------------------------------------------------------------------===//
117 // IndexedReference implementation
118 //
operator <<(raw_ostream & OS,const IndexedReference & R)119 raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) {
120 if (!R.IsValid) {
121 OS << R.StoreOrLoadInst;
122 OS << ", IsValid=false.";
123 return OS;
124 }
125
126 OS << *R.BasePointer;
127 for (const SCEV *Subscript : R.Subscripts)
128 OS << "[" << *Subscript << "]";
129
130 OS << ", Sizes: ";
131 for (const SCEV *Size : R.Sizes)
132 OS << "[" << *Size << "]";
133
134 return OS;
135 }
136
IndexedReference(Instruction & StoreOrLoadInst,const LoopInfo & LI,ScalarEvolution & SE)137 IndexedReference::IndexedReference(Instruction &StoreOrLoadInst,
138 const LoopInfo &LI, ScalarEvolution &SE)
139 : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
140 assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
141 "Expecting a load or store instruction");
142
143 IsValid = delinearize(LI);
144 if (IsValid)
145 LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
146 << "\n");
147 }
148
hasSpacialReuse(const IndexedReference & Other,unsigned CLS,AAResults & AA) const149 Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other,
150 unsigned CLS,
151 AAResults &AA) const {
152 assert(IsValid && "Expecting a valid reference");
153
154 if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
155 LLVM_DEBUG(dbgs().indent(2)
156 << "No spacial reuse: different base pointers\n");
157 return false;
158 }
159
160 unsigned NumSubscripts = getNumSubscripts();
161 if (NumSubscripts != Other.getNumSubscripts()) {
162 LLVM_DEBUG(dbgs().indent(2)
163 << "No spacial reuse: different number of subscripts\n");
164 return false;
165 }
166
167 // all subscripts must be equal, except the leftmost one (the last one).
168 for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
169 if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
170 LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
171 << "\n\t" << *getSubscript(SubNum) << "\n\t"
172 << *Other.getSubscript(SubNum) << "\n");
173 return false;
174 }
175 }
176
177 // the difference between the last subscripts must be less than the cache line
178 // size.
179 const SCEV *LastSubscript = getLastSubscript();
180 const SCEV *OtherLastSubscript = Other.getLastSubscript();
181 const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
182 SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
183
184 if (Diff == nullptr) {
185 LLVM_DEBUG(dbgs().indent(2)
186 << "No spacial reuse, difference between subscript:\n\t"
187 << *LastSubscript << "\n\t" << OtherLastSubscript
188 << "\nis not constant.\n");
189 return None;
190 }
191
192 bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
193
194 LLVM_DEBUG({
195 if (InSameCacheLine)
196 dbgs().indent(2) << "Found spacial reuse.\n";
197 else
198 dbgs().indent(2) << "No spacial reuse.\n";
199 });
200
201 return InSameCacheLine;
202 }
203
hasTemporalReuse(const IndexedReference & Other,unsigned MaxDistance,const Loop & L,DependenceInfo & DI,AAResults & AA) const204 Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other,
205 unsigned MaxDistance,
206 const Loop &L,
207 DependenceInfo &DI,
208 AAResults &AA) const {
209 assert(IsValid && "Expecting a valid reference");
210
211 if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
212 LLVM_DEBUG(dbgs().indent(2)
213 << "No temporal reuse: different base pointer\n");
214 return false;
215 }
216
217 std::unique_ptr<Dependence> D =
218 DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
219
220 if (D == nullptr) {
221 LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
222 return false;
223 }
224
225 if (D->isLoopIndependent()) {
226 LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
227 return true;
228 }
229
230 // Check the dependence distance at every loop level. There is temporal reuse
231 // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
232 // it is zero at every other loop level.
233 int LoopDepth = L.getLoopDepth();
234 int Levels = D->getLevels();
235 for (int Level = 1; Level <= Levels; ++Level) {
236 const SCEV *Distance = D->getDistance(Level);
237 const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
238
239 if (SCEVConst == nullptr) {
240 LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
241 return None;
242 }
243
244 const ConstantInt &CI = *SCEVConst->getValue();
245 if (Level != LoopDepth && !CI.isZero()) {
246 LLVM_DEBUG(dbgs().indent(2)
247 << "No temporal reuse: distance is not zero at depth=" << Level
248 << "\n");
249 return false;
250 } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
251 LLVM_DEBUG(
252 dbgs().indent(2)
253 << "No temporal reuse: distance is greater than MaxDistance at depth="
254 << Level << "\n");
255 return false;
256 }
257 }
258
259 LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
260 return true;
261 }
262
computeRefCost(const Loop & L,unsigned CLS) const263 CacheCostTy IndexedReference::computeRefCost(const Loop &L,
264 unsigned CLS) const {
265 assert(IsValid && "Expecting a valid reference");
266 LLVM_DEBUG({
267 dbgs().indent(2) << "Computing cache cost for:\n";
268 dbgs().indent(4) << *this << "\n";
269 });
270
271 // If the indexed reference is loop invariant the cost is one.
272 if (isLoopInvariant(L)) {
273 LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
274 return 1;
275 }
276
277 const SCEV *TripCount = computeTripCount(L, SE);
278 if (!TripCount) {
279 LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
280 << " could not be computed, using DefaultTripCount\n");
281 const SCEV *ElemSize = Sizes.back();
282 TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
283 }
284 LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
285
286 // If the indexed reference is 'consecutive' the cost is
287 // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
288 const SCEV *RefCost = TripCount;
289
290 if (isConsecutive(L, CLS)) {
291 const SCEV *Coeff = getLastCoefficient();
292 const SCEV *ElemSize = Sizes.back();
293 const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
294 Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
295 const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
296 if (SE.isKnownNegative(Stride))
297 Stride = SE.getNegativeSCEV(Stride);
298 Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
299 TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
300 const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
301 RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
302
303 LLVM_DEBUG(dbgs().indent(4)
304 << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
305 << *RefCost << "\n");
306 } else
307 LLVM_DEBUG(dbgs().indent(4)
308 << "Access is not consecutive: RefCost=TripCount=" << *RefCost
309 << "\n");
310
311 // Attempt to fold RefCost into a constant.
312 if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
313 return ConstantCost->getValue()->getSExtValue();
314
315 LLVM_DEBUG(dbgs().indent(4)
316 << "RefCost is not a constant! Setting to RefCost=InvalidCost "
317 "(invalid value).\n");
318
319 return CacheCost::InvalidCost;
320 }
321
delinearize(const LoopInfo & LI)322 bool IndexedReference::delinearize(const LoopInfo &LI) {
323 assert(Subscripts.empty() && "Subscripts should be empty");
324 assert(Sizes.empty() && "Sizes should be empty");
325 assert(!IsValid && "Should be called once from the constructor");
326 LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
327
328 const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
329 const BasicBlock *BB = StoreOrLoadInst.getParent();
330
331 if (Loop *L = LI.getLoopFor(BB)) {
332 const SCEV *AccessFn =
333 SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
334
335 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
336 if (BasePointer == nullptr) {
337 LLVM_DEBUG(
338 dbgs().indent(2)
339 << "ERROR: failed to delinearize, can't identify base pointer\n");
340 return false;
341 }
342
343 AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
344
345 LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
346 << "', AccessFn: " << *AccessFn << "\n");
347
348 llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
349 SE.getElementSize(&StoreOrLoadInst));
350
351 if (Subscripts.empty() || Sizes.empty() ||
352 Subscripts.size() != Sizes.size()) {
353 // Attempt to determine whether we have a single dimensional array access.
354 // before giving up.
355 if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
356 LLVM_DEBUG(dbgs().indent(2)
357 << "ERROR: failed to delinearize reference\n");
358 Subscripts.clear();
359 Sizes.clear();
360 return false;
361 }
362
363 // The array may be accessed in reverse, for example:
364 // for (i = N; i > 0; i--)
365 // A[i] = 0;
366 // In this case, reconstruct the access function using the absolute value
367 // of the step recurrence.
368 const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
369 const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
370
371 if (StepRec && SE.isKnownNegative(StepRec))
372 AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(),
373 SE.getNegativeSCEV(StepRec),
374 AccessFnAR->getLoop(),
375 AccessFnAR->getNoWrapFlags());
376 const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
377 Subscripts.push_back(Div);
378 Sizes.push_back(ElemSize);
379 }
380
381 return all_of(Subscripts, [&](const SCEV *Subscript) {
382 return isSimpleAddRecurrence(*Subscript, *L);
383 });
384 }
385
386 return false;
387 }
388
isLoopInvariant(const Loop & L) const389 bool IndexedReference::isLoopInvariant(const Loop &L) const {
390 Value *Addr = getPointerOperand(&StoreOrLoadInst);
391 assert(Addr != nullptr && "Expecting either a load or a store instruction");
392 assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
393
394 if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
395 return true;
396
397 // The indexed reference is loop invariant if none of the coefficients use
398 // the loop induction variable.
399 bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
400 return isCoeffForLoopZeroOrInvariant(*Subscript, L);
401 });
402
403 return allCoeffForLoopAreZero;
404 }
405
isConsecutive(const Loop & L,unsigned CLS) const406 bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
407 // The indexed reference is 'consecutive' if the only coefficient that uses
408 // the loop induction variable is the last one...
409 const SCEV *LastSubscript = Subscripts.back();
410 for (const SCEV *Subscript : Subscripts) {
411 if (Subscript == LastSubscript)
412 continue;
413 if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
414 return false;
415 }
416
417 // ...and the access stride is less than the cache line size.
418 const SCEV *Coeff = getLastCoefficient();
419 const SCEV *ElemSize = Sizes.back();
420 const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
421 const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
422
423 Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
424 return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
425 }
426
getLastCoefficient() const427 const SCEV *IndexedReference::getLastCoefficient() const {
428 const SCEV *LastSubscript = getLastSubscript();
429 assert(isa<SCEVAddRecExpr>(LastSubscript) &&
430 "Expecting a SCEV add recurrence expression");
431 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
432 return AR->getStepRecurrence(SE);
433 }
434
isCoeffForLoopZeroOrInvariant(const SCEV & Subscript,const Loop & L) const435 bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
436 const Loop &L) const {
437 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
438 return (AR != nullptr) ? AR->getLoop() != &L
439 : SE.isLoopInvariant(&Subscript, &L);
440 }
441
isSimpleAddRecurrence(const SCEV & Subscript,const Loop & L) const442 bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
443 const Loop &L) const {
444 if (!isa<SCEVAddRecExpr>(Subscript))
445 return false;
446
447 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
448 assert(AR->getLoop() && "AR should have a loop");
449
450 if (!AR->isAffine())
451 return false;
452
453 const SCEV *Start = AR->getStart();
454 const SCEV *Step = AR->getStepRecurrence(SE);
455
456 if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
457 return false;
458
459 return true;
460 }
461
isAliased(const IndexedReference & Other,AAResults & AA) const462 bool IndexedReference::isAliased(const IndexedReference &Other,
463 AAResults &AA) const {
464 const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
465 const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
466 return AA.isMustAlias(Loc1, Loc2);
467 }
468
469 //===----------------------------------------------------------------------===//
470 // CacheCost implementation
471 //
operator <<(raw_ostream & OS,const CacheCost & CC)472 raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
473 for (const auto &LC : CC.LoopCosts) {
474 const Loop *L = LC.first;
475 OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
476 }
477 return OS;
478 }
479
CacheCost(const LoopVectorTy & Loops,const LoopInfo & LI,ScalarEvolution & SE,TargetTransformInfo & TTI,AAResults & AA,DependenceInfo & DI,Optional<unsigned> TRT)480 CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
481 ScalarEvolution &SE, TargetTransformInfo &TTI,
482 AAResults &AA, DependenceInfo &DI,
483 Optional<unsigned> TRT)
484 : Loops(Loops), TripCounts(), LoopCosts(),
485 TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
486 LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
487 assert(!Loops.empty() && "Expecting a non-empty loop vector.");
488
489 for (const Loop *L : Loops) {
490 unsigned TripCount = SE.getSmallConstantTripCount(L);
491 TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
492 TripCounts.push_back({L, TripCount});
493 }
494
495 calculateCacheFootprint();
496 }
497
498 std::unique_ptr<CacheCost>
getCacheCost(Loop & Root,LoopStandardAnalysisResults & AR,DependenceInfo & DI,Optional<unsigned> TRT)499 CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
500 DependenceInfo &DI, Optional<unsigned> TRT) {
501 if (!Root.isOutermost()) {
502 LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
503 return nullptr;
504 }
505
506 LoopVectorTy Loops;
507 append_range(Loops, breadth_first(&Root));
508
509 if (!getInnerMostLoop(Loops)) {
510 LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
511 "than one innermost loop\n");
512 return nullptr;
513 }
514
515 return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
516 }
517
calculateCacheFootprint()518 void CacheCost::calculateCacheFootprint() {
519 LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
520 ReferenceGroupsTy RefGroups;
521 if (!populateReferenceGroups(RefGroups))
522 return;
523
524 LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
525 for (const Loop *L : Loops) {
526 assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
527 [L](const LoopCacheCostTy &LCC) {
528 return LCC.first == L;
529 }) == LoopCosts.end()) &&
530 "Should not add duplicate element");
531 CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
532 LoopCosts.push_back(std::make_pair(L, LoopCost));
533 }
534
535 sortLoopCosts();
536 RefGroups.clear();
537 }
538
populateReferenceGroups(ReferenceGroupsTy & RefGroups) const539 bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
540 assert(RefGroups.empty() && "Reference groups should be empty");
541
542 unsigned CLS = TTI.getCacheLineSize();
543 Loop *InnerMostLoop = getInnerMostLoop(Loops);
544 assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
545
546 for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
547 for (Instruction &I : *BB) {
548 if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
549 continue;
550
551 std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
552 if (!R->isValid())
553 continue;
554
555 bool Added = false;
556 for (ReferenceGroupTy &RefGroup : RefGroups) {
557 const IndexedReference &Representative = *RefGroup.front().get();
558 LLVM_DEBUG({
559 dbgs() << "References:\n";
560 dbgs().indent(2) << *R << "\n";
561 dbgs().indent(2) << Representative << "\n";
562 });
563
564
565 // FIXME: Both positive and negative access functions will be placed
566 // into the same reference group, resulting in a bi-directional array
567 // access such as:
568 // for (i = N; i > 0; i--)
569 // A[i] = A[N - i];
570 // having the same cost calculation as a single dimention access pattern
571 // for (i = 0; i < N; i++)
572 // A[i] = A[i];
573 // when in actuality, depending on the array size, the first example
574 // should have a cost closer to 2x the second due to the two cache
575 // access per iteration from opposite ends of the array
576 Optional<bool> HasTemporalReuse =
577 R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
578 Optional<bool> HasSpacialReuse =
579 R->hasSpacialReuse(Representative, CLS, AA);
580
581 if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
582 (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
583 RefGroup.push_back(std::move(R));
584 Added = true;
585 break;
586 }
587 }
588
589 if (!Added) {
590 ReferenceGroupTy RG;
591 RG.push_back(std::move(R));
592 RefGroups.push_back(std::move(RG));
593 }
594 }
595 }
596
597 if (RefGroups.empty())
598 return false;
599
600 LLVM_DEBUG({
601 dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
602 int n = 1;
603 for (const ReferenceGroupTy &RG : RefGroups) {
604 dbgs().indent(2) << "RefGroup " << n << ":\n";
605 for (const auto &IR : RG)
606 dbgs().indent(4) << *IR << "\n";
607 n++;
608 }
609 dbgs() << "\n";
610 });
611
612 return true;
613 }
614
615 CacheCostTy
computeLoopCacheCost(const Loop & L,const ReferenceGroupsTy & RefGroups) const616 CacheCost::computeLoopCacheCost(const Loop &L,
617 const ReferenceGroupsTy &RefGroups) const {
618 if (!L.isLoopSimplifyForm())
619 return InvalidCost;
620
621 LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
622 << "' as innermost loop.\n");
623
624 // Compute the product of the trip counts of each other loop in the nest.
625 CacheCostTy TripCountsProduct = 1;
626 for (const auto &TC : TripCounts) {
627 if (TC.first == &L)
628 continue;
629 TripCountsProduct *= TC.second;
630 }
631
632 CacheCostTy LoopCost = 0;
633 for (const ReferenceGroupTy &RG : RefGroups) {
634 CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
635 LoopCost += RefGroupCost * TripCountsProduct;
636 }
637
638 LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
639 << "' has cost=" << LoopCost << "\n");
640
641 return LoopCost;
642 }
643
computeRefGroupCacheCost(const ReferenceGroupTy & RG,const Loop & L) const644 CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
645 const Loop &L) const {
646 assert(!RG.empty() && "Reference group should have at least one member.");
647
648 const IndexedReference *Representative = RG.front().get();
649 return Representative->computeRefCost(L, TTI.getCacheLineSize());
650 }
651
652 //===----------------------------------------------------------------------===//
653 // LoopCachePrinterPass implementation
654 //
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)655 PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
656 LoopStandardAnalysisResults &AR,
657 LPMUpdater &U) {
658 Function *F = L.getHeader()->getParent();
659 DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
660
661 if (auto CC = CacheCost::getCacheCost(L, AR, DI))
662 OS << *CC;
663
664 return PreservedAnalyses::all();
665 }
666