11d5ae102SDimitry Andric //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
21d5ae102SDimitry Andric //
31d5ae102SDimitry Andric // The LLVM Compiler Infrastructure
41d5ae102SDimitry Andric //
51d5ae102SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
61d5ae102SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
71d5ae102SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
81d5ae102SDimitry Andric //
91d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
101d5ae102SDimitry Andric ///
111d5ae102SDimitry Andric /// \file
121d5ae102SDimitry Andric /// This file defines the implementation for the loop cache analysis.
131d5ae102SDimitry Andric /// The implementation is largely based on the following paper:
141d5ae102SDimitry Andric ///
151d5ae102SDimitry Andric /// Compiler Optimizations for Improving Data Locality
161d5ae102SDimitry Andric /// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
171d5ae102SDimitry Andric /// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
181d5ae102SDimitry Andric ///
191d5ae102SDimitry Andric /// The general approach taken to estimate the number of cache lines used by the
201d5ae102SDimitry Andric /// memory references in an inner loop is:
211d5ae102SDimitry Andric /// 1. Partition memory references that exhibit temporal or spacial reuse
221d5ae102SDimitry Andric /// into reference groups.
231d5ae102SDimitry Andric /// 2. For each loop L in the a loop nest LN:
241d5ae102SDimitry Andric /// a. Compute the cost of the reference group
251d5ae102SDimitry Andric /// b. Compute the loop cost by summing up the reference groups costs
261d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
271d5ae102SDimitry Andric
281d5ae102SDimitry Andric #include "llvm/Analysis/LoopCacheAnalysis.h"
291d5ae102SDimitry Andric #include "llvm/ADT/BreadthFirstIterator.h"
301d5ae102SDimitry Andric #include "llvm/ADT/Sequence.h"
311d5ae102SDimitry Andric #include "llvm/ADT/SmallVector.h"
32b60736ecSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
33c0981da4SDimitry Andric #include "llvm/Analysis/Delinearization.h"
34b60736ecSDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
35b60736ecSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
36cfca06d7SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
37b60736ecSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
38706b4fc4SDimitry Andric #include "llvm/Support/CommandLine.h"
391d5ae102SDimitry Andric #include "llvm/Support/Debug.h"
401d5ae102SDimitry Andric
411d5ae102SDimitry Andric using namespace llvm;
421d5ae102SDimitry Andric
431d5ae102SDimitry Andric #define DEBUG_TYPE "loop-cache-cost"
441d5ae102SDimitry Andric
451d5ae102SDimitry Andric static cl::opt<unsigned> DefaultTripCount(
461d5ae102SDimitry Andric "default-trip-count", cl::init(100), cl::Hidden,
471d5ae102SDimitry Andric cl::desc("Use this to specify the default trip count of a loop"));
481d5ae102SDimitry Andric
491d5ae102SDimitry Andric // In this analysis two array references are considered to exhibit temporal
501d5ae102SDimitry Andric // reuse if they access either the same memory location, or a memory location
511d5ae102SDimitry Andric // with distance smaller than a configurable threshold.
521d5ae102SDimitry Andric static cl::opt<unsigned> TemporalReuseThreshold(
531d5ae102SDimitry Andric "temporal-reuse-threshold", cl::init(2), cl::Hidden,
541d5ae102SDimitry Andric cl::desc("Use this to specify the max. distance between array elements "
551d5ae102SDimitry Andric "accessed in a loop so that the elements are classified to have "
561d5ae102SDimitry Andric "temporal reuse"));
571d5ae102SDimitry Andric
581d5ae102SDimitry Andric /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
591d5ae102SDimitry Andric /// nullptr if any loops in the loop vector supplied has more than one sibling.
601d5ae102SDimitry Andric /// The loop vector is expected to contain loops collected in breadth-first
611d5ae102SDimitry Andric /// order.
getInnerMostLoop(const LoopVectorTy & Loops)621d5ae102SDimitry Andric static Loop *getInnerMostLoop(const LoopVectorTy &Loops) {
631d5ae102SDimitry Andric assert(!Loops.empty() && "Expecting a non-empy loop vector");
641d5ae102SDimitry Andric
651d5ae102SDimitry Andric Loop *LastLoop = Loops.back();
661d5ae102SDimitry Andric Loop *ParentLoop = LastLoop->getParentLoop();
671d5ae102SDimitry Andric
681d5ae102SDimitry Andric if (ParentLoop == nullptr) {
691d5ae102SDimitry Andric assert(Loops.size() == 1 && "Expecting a single loop");
701d5ae102SDimitry Andric return LastLoop;
711d5ae102SDimitry Andric }
721d5ae102SDimitry Andric
73cfca06d7SDimitry Andric return (llvm::is_sorted(Loops,
741d5ae102SDimitry Andric [](const Loop *L1, const Loop *L2) {
751d5ae102SDimitry Andric return L1->getLoopDepth() < L2->getLoopDepth();
761d5ae102SDimitry Andric }))
771d5ae102SDimitry Andric ? LastLoop
781d5ae102SDimitry Andric : nullptr;
791d5ae102SDimitry Andric }
801d5ae102SDimitry Andric
isOneDimensionalArray(const SCEV & AccessFn,const SCEV & ElemSize,const Loop & L,ScalarEvolution & SE)811d5ae102SDimitry Andric static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
821d5ae102SDimitry Andric const Loop &L, ScalarEvolution &SE) {
831d5ae102SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
841d5ae102SDimitry Andric if (!AR || !AR->isAffine())
851d5ae102SDimitry Andric return false;
861d5ae102SDimitry Andric
871d5ae102SDimitry Andric assert(AR->getLoop() && "AR should have a loop");
881d5ae102SDimitry Andric
891d5ae102SDimitry Andric // Check that start and increment are not add recurrences.
901d5ae102SDimitry Andric const SCEV *Start = AR->getStart();
911d5ae102SDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE);
921d5ae102SDimitry Andric if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
931d5ae102SDimitry Andric return false;
941d5ae102SDimitry Andric
951d5ae102SDimitry Andric // Check that start and increment are both invariant in the loop.
961d5ae102SDimitry Andric if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
971d5ae102SDimitry Andric return false;
981d5ae102SDimitry Andric
99cfca06d7SDimitry Andric const SCEV *StepRec = AR->getStepRecurrence(SE);
100cfca06d7SDimitry Andric if (StepRec && SE.isKnownNegative(StepRec))
101cfca06d7SDimitry Andric StepRec = SE.getNegativeSCEV(StepRec);
102cfca06d7SDimitry Andric
103cfca06d7SDimitry Andric return StepRec == &ElemSize;
1041d5ae102SDimitry Andric }
1051d5ae102SDimitry Andric
106145449b1SDimitry Andric /// Compute the trip count for the given loop \p L or assume a default value if
107145449b1SDimitry Andric /// it is not a compile time constant. Return the SCEV expression for the trip
108145449b1SDimitry Andric /// count.
computeTripCount(const Loop & L,const SCEV & ElemSize,ScalarEvolution & SE)109145449b1SDimitry Andric static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize,
110145449b1SDimitry Andric ScalarEvolution &SE) {
1111d5ae102SDimitry Andric const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
112145449b1SDimitry Andric const SCEV *TripCount = (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
113145449b1SDimitry Andric isa<SCEVConstant>(BackedgeTakenCount))
114145449b1SDimitry Andric ? SE.getTripCountFromExitCount(BackedgeTakenCount)
115145449b1SDimitry Andric : nullptr;
116145449b1SDimitry Andric
117145449b1SDimitry Andric if (!TripCount) {
118145449b1SDimitry Andric LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
119145449b1SDimitry Andric << " could not be computed, using DefaultTripCount\n");
120145449b1SDimitry Andric TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount);
121145449b1SDimitry Andric }
122145449b1SDimitry Andric
123145449b1SDimitry Andric return TripCount;
1241d5ae102SDimitry Andric }
1251d5ae102SDimitry Andric
1261d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
1271d5ae102SDimitry Andric // IndexedReference implementation
1281d5ae102SDimitry Andric //
operator <<(raw_ostream & OS,const IndexedReference & R)1291d5ae102SDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) {
1301d5ae102SDimitry Andric if (!R.IsValid) {
1311d5ae102SDimitry Andric OS << R.StoreOrLoadInst;
1321d5ae102SDimitry Andric OS << ", IsValid=false.";
1331d5ae102SDimitry Andric return OS;
1341d5ae102SDimitry Andric }
1351d5ae102SDimitry Andric
1361d5ae102SDimitry Andric OS << *R.BasePointer;
1371d5ae102SDimitry Andric for (const SCEV *Subscript : R.Subscripts)
1381d5ae102SDimitry Andric OS << "[" << *Subscript << "]";
1391d5ae102SDimitry Andric
1401d5ae102SDimitry Andric OS << ", Sizes: ";
1411d5ae102SDimitry Andric for (const SCEV *Size : R.Sizes)
1421d5ae102SDimitry Andric OS << "[" << *Size << "]";
1431d5ae102SDimitry Andric
1441d5ae102SDimitry Andric return OS;
1451d5ae102SDimitry Andric }
1461d5ae102SDimitry Andric
IndexedReference(Instruction & StoreOrLoadInst,const LoopInfo & LI,ScalarEvolution & SE)1471d5ae102SDimitry Andric IndexedReference::IndexedReference(Instruction &StoreOrLoadInst,
1481d5ae102SDimitry Andric const LoopInfo &LI, ScalarEvolution &SE)
1491d5ae102SDimitry Andric : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
1501d5ae102SDimitry Andric assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
1511d5ae102SDimitry Andric "Expecting a load or store instruction");
1521d5ae102SDimitry Andric
1531d5ae102SDimitry Andric IsValid = delinearize(LI);
1541d5ae102SDimitry Andric if (IsValid)
1551d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
1561d5ae102SDimitry Andric << "\n");
1571d5ae102SDimitry Andric }
1581d5ae102SDimitry Andric
159e3b55780SDimitry Andric std::optional<bool>
hasSpacialReuse(const IndexedReference & Other,unsigned CLS,AAResults & AA) const160e3b55780SDimitry Andric IndexedReference::hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
161b60736ecSDimitry Andric AAResults &AA) const {
1621d5ae102SDimitry Andric assert(IsValid && "Expecting a valid reference");
1631d5ae102SDimitry Andric
1641d5ae102SDimitry Andric if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
1651d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2)
1661d5ae102SDimitry Andric << "No spacial reuse: different base pointers\n");
1671d5ae102SDimitry Andric return false;
1681d5ae102SDimitry Andric }
1691d5ae102SDimitry Andric
1701d5ae102SDimitry Andric unsigned NumSubscripts = getNumSubscripts();
1711d5ae102SDimitry Andric if (NumSubscripts != Other.getNumSubscripts()) {
1721d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2)
1731d5ae102SDimitry Andric << "No spacial reuse: different number of subscripts\n");
1741d5ae102SDimitry Andric return false;
1751d5ae102SDimitry Andric }
1761d5ae102SDimitry Andric
1771d5ae102SDimitry Andric // all subscripts must be equal, except the leftmost one (the last one).
1781d5ae102SDimitry Andric for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
1791d5ae102SDimitry Andric if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
1801d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
1811d5ae102SDimitry Andric << "\n\t" << *getSubscript(SubNum) << "\n\t"
1821d5ae102SDimitry Andric << *Other.getSubscript(SubNum) << "\n");
1831d5ae102SDimitry Andric return false;
1841d5ae102SDimitry Andric }
1851d5ae102SDimitry Andric }
1861d5ae102SDimitry Andric
1871d5ae102SDimitry Andric // the difference between the last subscripts must be less than the cache line
1881d5ae102SDimitry Andric // size.
1891d5ae102SDimitry Andric const SCEV *LastSubscript = getLastSubscript();
1901d5ae102SDimitry Andric const SCEV *OtherLastSubscript = Other.getLastSubscript();
1911d5ae102SDimitry Andric const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
1921d5ae102SDimitry Andric SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
1931d5ae102SDimitry Andric
1941d5ae102SDimitry Andric if (Diff == nullptr) {
1951d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2)
1961d5ae102SDimitry Andric << "No spacial reuse, difference between subscript:\n\t"
1971d5ae102SDimitry Andric << *LastSubscript << "\n\t" << OtherLastSubscript
1981d5ae102SDimitry Andric << "\nis not constant.\n");
199e3b55780SDimitry Andric return std::nullopt;
2001d5ae102SDimitry Andric }
2011d5ae102SDimitry Andric
2021d5ae102SDimitry Andric bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
2031d5ae102SDimitry Andric
2041d5ae102SDimitry Andric LLVM_DEBUG({
2051d5ae102SDimitry Andric if (InSameCacheLine)
2061d5ae102SDimitry Andric dbgs().indent(2) << "Found spacial reuse.\n";
2071d5ae102SDimitry Andric else
2081d5ae102SDimitry Andric dbgs().indent(2) << "No spacial reuse.\n";
2091d5ae102SDimitry Andric });
2101d5ae102SDimitry Andric
2111d5ae102SDimitry Andric return InSameCacheLine;
2121d5ae102SDimitry Andric }
2131d5ae102SDimitry Andric
214e3b55780SDimitry Andric std::optional<bool>
hasTemporalReuse(const IndexedReference & Other,unsigned MaxDistance,const Loop & L,DependenceInfo & DI,AAResults & AA) const215e3b55780SDimitry Andric IndexedReference::hasTemporalReuse(const IndexedReference &Other,
216e3b55780SDimitry Andric unsigned MaxDistance, const Loop &L,
217e3b55780SDimitry Andric DependenceInfo &DI, AAResults &AA) const {
2181d5ae102SDimitry Andric assert(IsValid && "Expecting a valid reference");
2191d5ae102SDimitry Andric
2201d5ae102SDimitry Andric if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
2211d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2)
2221d5ae102SDimitry Andric << "No temporal reuse: different base pointer\n");
2231d5ae102SDimitry Andric return false;
2241d5ae102SDimitry Andric }
2251d5ae102SDimitry Andric
2261d5ae102SDimitry Andric std::unique_ptr<Dependence> D =
2271d5ae102SDimitry Andric DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
2281d5ae102SDimitry Andric
2291d5ae102SDimitry Andric if (D == nullptr) {
2301d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
2311d5ae102SDimitry Andric return false;
2321d5ae102SDimitry Andric }
2331d5ae102SDimitry Andric
2341d5ae102SDimitry Andric if (D->isLoopIndependent()) {
2351d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
2361d5ae102SDimitry Andric return true;
2371d5ae102SDimitry Andric }
2381d5ae102SDimitry Andric
2391d5ae102SDimitry Andric // Check the dependence distance at every loop level. There is temporal reuse
2401d5ae102SDimitry Andric // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
2411d5ae102SDimitry Andric // it is zero at every other loop level.
2421d5ae102SDimitry Andric int LoopDepth = L.getLoopDepth();
2431d5ae102SDimitry Andric int Levels = D->getLevels();
2441d5ae102SDimitry Andric for (int Level = 1; Level <= Levels; ++Level) {
2451d5ae102SDimitry Andric const SCEV *Distance = D->getDistance(Level);
2461d5ae102SDimitry Andric const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
2471d5ae102SDimitry Andric
2481d5ae102SDimitry Andric if (SCEVConst == nullptr) {
2491d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
250e3b55780SDimitry Andric return std::nullopt;
2511d5ae102SDimitry Andric }
2521d5ae102SDimitry Andric
2531d5ae102SDimitry Andric const ConstantInt &CI = *SCEVConst->getValue();
2541d5ae102SDimitry Andric if (Level != LoopDepth && !CI.isZero()) {
2551d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2)
2561d5ae102SDimitry Andric << "No temporal reuse: distance is not zero at depth=" << Level
2571d5ae102SDimitry Andric << "\n");
2581d5ae102SDimitry Andric return false;
2591d5ae102SDimitry Andric } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
2601d5ae102SDimitry Andric LLVM_DEBUG(
2611d5ae102SDimitry Andric dbgs().indent(2)
2621d5ae102SDimitry Andric << "No temporal reuse: distance is greater than MaxDistance at depth="
2631d5ae102SDimitry Andric << Level << "\n");
2641d5ae102SDimitry Andric return false;
2651d5ae102SDimitry Andric }
2661d5ae102SDimitry Andric }
2671d5ae102SDimitry Andric
2681d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
2691d5ae102SDimitry Andric return true;
2701d5ae102SDimitry Andric }
2711d5ae102SDimitry Andric
computeRefCost(const Loop & L,unsigned CLS) const2721d5ae102SDimitry Andric CacheCostTy IndexedReference::computeRefCost(const Loop &L,
2731d5ae102SDimitry Andric unsigned CLS) const {
2741d5ae102SDimitry Andric assert(IsValid && "Expecting a valid reference");
2751d5ae102SDimitry Andric LLVM_DEBUG({
2761d5ae102SDimitry Andric dbgs().indent(2) << "Computing cache cost for:\n";
2771d5ae102SDimitry Andric dbgs().indent(4) << *this << "\n";
2781d5ae102SDimitry Andric });
2791d5ae102SDimitry Andric
2801d5ae102SDimitry Andric // If the indexed reference is loop invariant the cost is one.
2811d5ae102SDimitry Andric if (isLoopInvariant(L)) {
2821d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
2831d5ae102SDimitry Andric return 1;
2841d5ae102SDimitry Andric }
2851d5ae102SDimitry Andric
286145449b1SDimitry Andric const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE);
287145449b1SDimitry Andric assert(TripCount && "Expecting valid TripCount");
2881d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
2891d5ae102SDimitry Andric
290145449b1SDimitry Andric const SCEV *RefCost = nullptr;
2914b4fe385SDimitry Andric const SCEV *Stride = nullptr;
2924b4fe385SDimitry Andric if (isConsecutive(L, Stride, CLS)) {
293145449b1SDimitry Andric // If the indexed reference is 'consecutive' the cost is
294145449b1SDimitry Andric // (TripCount*Stride)/CLS.
2954b4fe385SDimitry Andric assert(Stride != nullptr &&
2964b4fe385SDimitry Andric "Stride should not be null for consecutive access!");
297706b4fc4SDimitry Andric Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
298c0981da4SDimitry Andric const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
299cfca06d7SDimitry Andric Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
3007fa27ce4SDimitry Andric TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
3011d5ae102SDimitry Andric const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
302ac9a064cSDimitry Andric // Round the fractional cost up to the nearest integer number.
303ac9a064cSDimitry Andric // The impact is the most significant when cost is calculated
304ac9a064cSDimitry Andric // to be a number less than one, because it makes more sense
305ac9a064cSDimitry Andric // to say one cache line is used rather than zero cache line
306ac9a064cSDimitry Andric // is used.
307ac9a064cSDimitry Andric RefCost = SE.getUDivCeilSCEV(Numerator, CacheLineSize);
308cfca06d7SDimitry Andric
3091d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(4)
3101d5ae102SDimitry Andric << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
3111d5ae102SDimitry Andric << *RefCost << "\n");
312145449b1SDimitry Andric } else {
313145449b1SDimitry Andric // If the indexed reference is not 'consecutive' the cost is proportional to
314145449b1SDimitry Andric // the trip count and the depth of the dimension which the subject loop
315145449b1SDimitry Andric // subscript is accessing. We try to estimate this by multiplying the cost
316145449b1SDimitry Andric // by the trip counts of loops corresponding to the inner dimensions. For
317145449b1SDimitry Andric // example, given the indexed reference 'A[i][j][k]', and assuming the
318145449b1SDimitry Andric // i-loop is in the innermost position, the cost would be equal to the
319145449b1SDimitry Andric // iterations of the i-loop multiplied by iterations of the j-loop.
320145449b1SDimitry Andric RefCost = TripCount;
321145449b1SDimitry Andric
322145449b1SDimitry Andric int Index = getSubscriptIndex(L);
323ac9a064cSDimitry Andric assert(Index >= 0 && "Could not locate a valid Index");
324145449b1SDimitry Andric
325145449b1SDimitry Andric for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) {
326145449b1SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(I));
327145449b1SDimitry Andric assert(AR && AR->getLoop() && "Expecting valid loop");
328145449b1SDimitry Andric const SCEV *TripCount =
329145449b1SDimitry Andric computeTripCount(*AR->getLoop(), *Sizes.back(), SE);
330145449b1SDimitry Andric Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType());
3317fa27ce4SDimitry Andric RefCost = SE.getMulExpr(SE.getNoopOrZeroExtend(RefCost, WiderType),
3327fa27ce4SDimitry Andric SE.getNoopOrZeroExtend(TripCount, WiderType));
333145449b1SDimitry Andric }
334145449b1SDimitry Andric
3351d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(4)
336145449b1SDimitry Andric << "Access is not consecutive: RefCost=" << *RefCost << "\n");
337145449b1SDimitry Andric }
338145449b1SDimitry Andric assert(RefCost && "Expecting a valid RefCost");
3391d5ae102SDimitry Andric
3401d5ae102SDimitry Andric // Attempt to fold RefCost into a constant.
3411d5ae102SDimitry Andric if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
3427fa27ce4SDimitry Andric return ConstantCost->getValue()->getZExtValue();
3431d5ae102SDimitry Andric
3441d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(4)
3451d5ae102SDimitry Andric << "RefCost is not a constant! Setting to RefCost=InvalidCost "
3461d5ae102SDimitry Andric "(invalid value).\n");
3471d5ae102SDimitry Andric
3481d5ae102SDimitry Andric return CacheCost::InvalidCost;
3491d5ae102SDimitry Andric }
3501d5ae102SDimitry Andric
tryDelinearizeFixedSize(const SCEV * AccessFn,SmallVectorImpl<const SCEV * > & Subscripts)351145449b1SDimitry Andric bool IndexedReference::tryDelinearizeFixedSize(
352145449b1SDimitry Andric const SCEV *AccessFn, SmallVectorImpl<const SCEV *> &Subscripts) {
353145449b1SDimitry Andric SmallVector<int, 4> ArraySizes;
354145449b1SDimitry Andric if (!tryDelinearizeFixedSizeImpl(&SE, &StoreOrLoadInst, AccessFn, Subscripts,
355145449b1SDimitry Andric ArraySizes))
356145449b1SDimitry Andric return false;
357145449b1SDimitry Andric
358145449b1SDimitry Andric // Populate Sizes with scev expressions to be used in calculations later.
359145449b1SDimitry Andric for (auto Idx : seq<unsigned>(1, Subscripts.size()))
360145449b1SDimitry Andric Sizes.push_back(
361145449b1SDimitry Andric SE.getConstant(Subscripts[Idx]->getType(), ArraySizes[Idx - 1]));
362145449b1SDimitry Andric
363145449b1SDimitry Andric LLVM_DEBUG({
364145449b1SDimitry Andric dbgs() << "Delinearized subscripts of fixed-size array\n"
365145449b1SDimitry Andric << "GEP:" << *getLoadStorePointerOperand(&StoreOrLoadInst)
366145449b1SDimitry Andric << "\n";
367145449b1SDimitry Andric });
368145449b1SDimitry Andric return true;
369145449b1SDimitry Andric }
370145449b1SDimitry Andric
delinearize(const LoopInfo & LI)3711d5ae102SDimitry Andric bool IndexedReference::delinearize(const LoopInfo &LI) {
3721d5ae102SDimitry Andric assert(Subscripts.empty() && "Subscripts should be empty");
3731d5ae102SDimitry Andric assert(Sizes.empty() && "Sizes should be empty");
3741d5ae102SDimitry Andric assert(!IsValid && "Should be called once from the constructor");
3751d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
3761d5ae102SDimitry Andric
3771d5ae102SDimitry Andric const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
3781d5ae102SDimitry Andric const BasicBlock *BB = StoreOrLoadInst.getParent();
3791d5ae102SDimitry Andric
380706b4fc4SDimitry Andric if (Loop *L = LI.getLoopFor(BB)) {
3811d5ae102SDimitry Andric const SCEV *AccessFn =
3821d5ae102SDimitry Andric SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
3831d5ae102SDimitry Andric
3841d5ae102SDimitry Andric BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
3851d5ae102SDimitry Andric if (BasePointer == nullptr) {
3861d5ae102SDimitry Andric LLVM_DEBUG(
3871d5ae102SDimitry Andric dbgs().indent(2)
3881d5ae102SDimitry Andric << "ERROR: failed to delinearize, can't identify base pointer\n");
3891d5ae102SDimitry Andric return false;
3901d5ae102SDimitry Andric }
3911d5ae102SDimitry Andric
392145449b1SDimitry Andric bool IsFixedSize = false;
393145449b1SDimitry Andric // Try to delinearize fixed-size arrays.
394145449b1SDimitry Andric if (tryDelinearizeFixedSize(AccessFn, Subscripts)) {
395145449b1SDimitry Andric IsFixedSize = true;
396145449b1SDimitry Andric // The last element of Sizes is the element size.
397145449b1SDimitry Andric Sizes.push_back(ElemSize);
3981d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
3991d5ae102SDimitry Andric << "', AccessFn: " << *AccessFn << "\n");
400145449b1SDimitry Andric }
4011d5ae102SDimitry Andric
402145449b1SDimitry Andric AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
403145449b1SDimitry Andric
404145449b1SDimitry Andric // Try to delinearize parametric-size arrays.
405145449b1SDimitry Andric if (!IsFixedSize) {
406145449b1SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
407145449b1SDimitry Andric << "', AccessFn: " << *AccessFn << "\n");
408c0981da4SDimitry Andric llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
4091d5ae102SDimitry Andric SE.getElementSize(&StoreOrLoadInst));
410145449b1SDimitry Andric }
4111d5ae102SDimitry Andric
4121d5ae102SDimitry Andric if (Subscripts.empty() || Sizes.empty() ||
4131d5ae102SDimitry Andric Subscripts.size() != Sizes.size()) {
4141d5ae102SDimitry Andric // Attempt to determine whether we have a single dimensional array access.
4151d5ae102SDimitry Andric // before giving up.
4161d5ae102SDimitry Andric if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
4171d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2)
4181d5ae102SDimitry Andric << "ERROR: failed to delinearize reference\n");
4191d5ae102SDimitry Andric Subscripts.clear();
4201d5ae102SDimitry Andric Sizes.clear();
421706b4fc4SDimitry Andric return false;
4221d5ae102SDimitry Andric }
4231d5ae102SDimitry Andric
424cfca06d7SDimitry Andric // The array may be accessed in reverse, for example:
425cfca06d7SDimitry Andric // for (i = N; i > 0; i--)
426cfca06d7SDimitry Andric // A[i] = 0;
427cfca06d7SDimitry Andric // In this case, reconstruct the access function using the absolute value
428cfca06d7SDimitry Andric // of the step recurrence.
429cfca06d7SDimitry Andric const SCEVAddRecExpr *AccessFnAR = dyn_cast<SCEVAddRecExpr>(AccessFn);
430cfca06d7SDimitry Andric const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr;
431cfca06d7SDimitry Andric
432cfca06d7SDimitry Andric if (StepRec && SE.isKnownNegative(StepRec))
433cfca06d7SDimitry Andric AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(),
434cfca06d7SDimitry Andric SE.getNegativeSCEV(StepRec),
435cfca06d7SDimitry Andric AccessFnAR->getLoop(),
436cfca06d7SDimitry Andric AccessFnAR->getNoWrapFlags());
4371d5ae102SDimitry Andric const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
4381d5ae102SDimitry Andric Subscripts.push_back(Div);
4391d5ae102SDimitry Andric Sizes.push_back(ElemSize);
4401d5ae102SDimitry Andric }
4411d5ae102SDimitry Andric
4421d5ae102SDimitry Andric return all_of(Subscripts, [&](const SCEV *Subscript) {
4431d5ae102SDimitry Andric return isSimpleAddRecurrence(*Subscript, *L);
4441d5ae102SDimitry Andric });
4451d5ae102SDimitry Andric }
4461d5ae102SDimitry Andric
4471d5ae102SDimitry Andric return false;
4481d5ae102SDimitry Andric }
4491d5ae102SDimitry Andric
isLoopInvariant(const Loop & L) const4501d5ae102SDimitry Andric bool IndexedReference::isLoopInvariant(const Loop &L) const {
4511d5ae102SDimitry Andric Value *Addr = getPointerOperand(&StoreOrLoadInst);
4521d5ae102SDimitry Andric assert(Addr != nullptr && "Expecting either a load or a store instruction");
4531d5ae102SDimitry Andric assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
4541d5ae102SDimitry Andric
4551d5ae102SDimitry Andric if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
4561d5ae102SDimitry Andric return true;
4571d5ae102SDimitry Andric
4581d5ae102SDimitry Andric // The indexed reference is loop invariant if none of the coefficients use
4591d5ae102SDimitry Andric // the loop induction variable.
4601d5ae102SDimitry Andric bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
4611d5ae102SDimitry Andric return isCoeffForLoopZeroOrInvariant(*Subscript, L);
4621d5ae102SDimitry Andric });
4631d5ae102SDimitry Andric
4641d5ae102SDimitry Andric return allCoeffForLoopAreZero;
4651d5ae102SDimitry Andric }
4661d5ae102SDimitry Andric
isConsecutive(const Loop & L,const SCEV * & Stride,unsigned CLS) const4674b4fe385SDimitry Andric bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
4684b4fe385SDimitry Andric unsigned CLS) const {
4691d5ae102SDimitry Andric // The indexed reference is 'consecutive' if the only coefficient that uses
4701d5ae102SDimitry Andric // the loop induction variable is the last one...
4711d5ae102SDimitry Andric const SCEV *LastSubscript = Subscripts.back();
4721d5ae102SDimitry Andric for (const SCEV *Subscript : Subscripts) {
4731d5ae102SDimitry Andric if (Subscript == LastSubscript)
4741d5ae102SDimitry Andric continue;
4751d5ae102SDimitry Andric if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
4761d5ae102SDimitry Andric return false;
4771d5ae102SDimitry Andric }
4781d5ae102SDimitry Andric
4791d5ae102SDimitry Andric // ...and the access stride is less than the cache line size.
4801d5ae102SDimitry Andric const SCEV *Coeff = getLastCoefficient();
4811d5ae102SDimitry Andric const SCEV *ElemSize = Sizes.back();
4824b4fe385SDimitry Andric Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
4834b4fe385SDimitry Andric // FIXME: This assumes that all values are signed integers which may
4844b4fe385SDimitry Andric // be incorrect in unusual codes and incorrectly use sext instead of zext.
4854b4fe385SDimitry Andric // for (uint32_t i = 0; i < 512; ++i) {
4864b4fe385SDimitry Andric // uint8_t trunc = i;
4874b4fe385SDimitry Andric // A[trunc] = 42;
4884b4fe385SDimitry Andric // }
4894b4fe385SDimitry Andric // This consecutively iterates twice over A. If `trunc` is sign-extended,
4904b4fe385SDimitry Andric // we would conclude that this may iterate backwards over the array.
4914b4fe385SDimitry Andric // However, LoopCacheAnalysis is heuristic anyway and transformations must
4924b4fe385SDimitry Andric // not result in wrong optimizations if the heuristic was incorrect.
4934b4fe385SDimitry Andric Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
4944b4fe385SDimitry Andric SE.getNoopOrSignExtend(ElemSize, WiderType));
4951d5ae102SDimitry Andric const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
4961d5ae102SDimitry Andric
497cfca06d7SDimitry Andric Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
4981d5ae102SDimitry Andric return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
4991d5ae102SDimitry Andric }
5001d5ae102SDimitry Andric
getSubscriptIndex(const Loop & L) const501145449b1SDimitry Andric int IndexedReference::getSubscriptIndex(const Loop &L) const {
502145449b1SDimitry Andric for (auto Idx : seq<int>(0, getNumSubscripts())) {
503145449b1SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(getSubscript(Idx));
504145449b1SDimitry Andric if (AR && AR->getLoop() == &L) {
505145449b1SDimitry Andric return Idx;
506145449b1SDimitry Andric }
507145449b1SDimitry Andric }
508145449b1SDimitry Andric return -1;
509145449b1SDimitry Andric }
510145449b1SDimitry Andric
getLastCoefficient() const5111d5ae102SDimitry Andric const SCEV *IndexedReference::getLastCoefficient() const {
5121d5ae102SDimitry Andric const SCEV *LastSubscript = getLastSubscript();
513c0981da4SDimitry Andric auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
5141d5ae102SDimitry Andric return AR->getStepRecurrence(SE);
5151d5ae102SDimitry Andric }
5161d5ae102SDimitry Andric
isCoeffForLoopZeroOrInvariant(const SCEV & Subscript,const Loop & L) const5171d5ae102SDimitry Andric bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
5181d5ae102SDimitry Andric const Loop &L) const {
5191d5ae102SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
5201d5ae102SDimitry Andric return (AR != nullptr) ? AR->getLoop() != &L
5211d5ae102SDimitry Andric : SE.isLoopInvariant(&Subscript, &L);
5221d5ae102SDimitry Andric }
5231d5ae102SDimitry Andric
isSimpleAddRecurrence(const SCEV & Subscript,const Loop & L) const5241d5ae102SDimitry Andric bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
5251d5ae102SDimitry Andric const Loop &L) const {
5261d5ae102SDimitry Andric if (!isa<SCEVAddRecExpr>(Subscript))
5271d5ae102SDimitry Andric return false;
5281d5ae102SDimitry Andric
5291d5ae102SDimitry Andric const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
5301d5ae102SDimitry Andric assert(AR->getLoop() && "AR should have a loop");
5311d5ae102SDimitry Andric
5321d5ae102SDimitry Andric if (!AR->isAffine())
5331d5ae102SDimitry Andric return false;
5341d5ae102SDimitry Andric
5351d5ae102SDimitry Andric const SCEV *Start = AR->getStart();
5361d5ae102SDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE);
5371d5ae102SDimitry Andric
5381d5ae102SDimitry Andric if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
5391d5ae102SDimitry Andric return false;
5401d5ae102SDimitry Andric
5411d5ae102SDimitry Andric return true;
5421d5ae102SDimitry Andric }
5431d5ae102SDimitry Andric
isAliased(const IndexedReference & Other,AAResults & AA) const5441d5ae102SDimitry Andric bool IndexedReference::isAliased(const IndexedReference &Other,
545b60736ecSDimitry Andric AAResults &AA) const {
5461d5ae102SDimitry Andric const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
5471d5ae102SDimitry Andric const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
5481d5ae102SDimitry Andric return AA.isMustAlias(Loc1, Loc2);
5491d5ae102SDimitry Andric }
5501d5ae102SDimitry Andric
5511d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
5521d5ae102SDimitry Andric // CacheCost implementation
5531d5ae102SDimitry Andric //
operator <<(raw_ostream & OS,const CacheCost & CC)5541d5ae102SDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
5551d5ae102SDimitry Andric for (const auto &LC : CC.LoopCosts) {
5561d5ae102SDimitry Andric const Loop *L = LC.first;
5571d5ae102SDimitry Andric OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
5581d5ae102SDimitry Andric }
5591d5ae102SDimitry Andric return OS;
5601d5ae102SDimitry Andric }
5611d5ae102SDimitry Andric
CacheCost(const LoopVectorTy & Loops,const LoopInfo & LI,ScalarEvolution & SE,TargetTransformInfo & TTI,AAResults & AA,DependenceInfo & DI,std::optional<unsigned> TRT)5621d5ae102SDimitry Andric CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
5631d5ae102SDimitry Andric ScalarEvolution &SE, TargetTransformInfo &TTI,
564e3b55780SDimitry Andric AAResults &AA, DependenceInfo &DI,
565e3b55780SDimitry Andric std::optional<unsigned> TRT)
566e3b55780SDimitry Andric : Loops(Loops), TRT(TRT.value_or(TemporalReuseThreshold)), LI(LI), SE(SE),
567e3b55780SDimitry Andric TTI(TTI), AA(AA), DI(DI) {
5681d5ae102SDimitry Andric assert(!Loops.empty() && "Expecting a non-empty loop vector.");
5691d5ae102SDimitry Andric
5701d5ae102SDimitry Andric for (const Loop *L : Loops) {
5711d5ae102SDimitry Andric unsigned TripCount = SE.getSmallConstantTripCount(L);
5721d5ae102SDimitry Andric TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
5731d5ae102SDimitry Andric TripCounts.push_back({L, TripCount});
5741d5ae102SDimitry Andric }
5751d5ae102SDimitry Andric
5761d5ae102SDimitry Andric calculateCacheFootprint();
5771d5ae102SDimitry Andric }
5781d5ae102SDimitry Andric
5791d5ae102SDimitry Andric std::unique_ptr<CacheCost>
getCacheCost(Loop & Root,LoopStandardAnalysisResults & AR,DependenceInfo & DI,std::optional<unsigned> TRT)5801d5ae102SDimitry Andric CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
581e3b55780SDimitry Andric DependenceInfo &DI, std::optional<unsigned> TRT) {
582b60736ecSDimitry Andric if (!Root.isOutermost()) {
5831d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
5841d5ae102SDimitry Andric return nullptr;
5851d5ae102SDimitry Andric }
5861d5ae102SDimitry Andric
5871d5ae102SDimitry Andric LoopVectorTy Loops;
588b60736ecSDimitry Andric append_range(Loops, breadth_first(&Root));
5891d5ae102SDimitry Andric
5901d5ae102SDimitry Andric if (!getInnerMostLoop(Loops)) {
5911d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
5921d5ae102SDimitry Andric "than one innermost loop\n");
5931d5ae102SDimitry Andric return nullptr;
5941d5ae102SDimitry Andric }
5951d5ae102SDimitry Andric
5961d5ae102SDimitry Andric return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
5971d5ae102SDimitry Andric }
5981d5ae102SDimitry Andric
calculateCacheFootprint()5991d5ae102SDimitry Andric void CacheCost::calculateCacheFootprint() {
6001d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
6011d5ae102SDimitry Andric ReferenceGroupsTy RefGroups;
6021d5ae102SDimitry Andric if (!populateReferenceGroups(RefGroups))
6031d5ae102SDimitry Andric return;
6041d5ae102SDimitry Andric
6051d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
6061d5ae102SDimitry Andric for (const Loop *L : Loops) {
607c0981da4SDimitry Andric assert(llvm::none_of(
608c0981da4SDimitry Andric LoopCosts,
609c0981da4SDimitry Andric [L](const LoopCacheCostTy &LCC) { return LCC.first == L; }) &&
6101d5ae102SDimitry Andric "Should not add duplicate element");
6111d5ae102SDimitry Andric CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
6121d5ae102SDimitry Andric LoopCosts.push_back(std::make_pair(L, LoopCost));
6131d5ae102SDimitry Andric }
6141d5ae102SDimitry Andric
6151d5ae102SDimitry Andric sortLoopCosts();
6161d5ae102SDimitry Andric RefGroups.clear();
6171d5ae102SDimitry Andric }
6181d5ae102SDimitry Andric
populateReferenceGroups(ReferenceGroupsTy & RefGroups) const6191d5ae102SDimitry Andric bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
6201d5ae102SDimitry Andric assert(RefGroups.empty() && "Reference groups should be empty");
6211d5ae102SDimitry Andric
6221d5ae102SDimitry Andric unsigned CLS = TTI.getCacheLineSize();
6231d5ae102SDimitry Andric Loop *InnerMostLoop = getInnerMostLoop(Loops);
6241d5ae102SDimitry Andric assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
6251d5ae102SDimitry Andric
6261d5ae102SDimitry Andric for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
6271d5ae102SDimitry Andric for (Instruction &I : *BB) {
6281d5ae102SDimitry Andric if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
6291d5ae102SDimitry Andric continue;
6301d5ae102SDimitry Andric
6311d5ae102SDimitry Andric std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
6321d5ae102SDimitry Andric if (!R->isValid())
6331d5ae102SDimitry Andric continue;
6341d5ae102SDimitry Andric
6351d5ae102SDimitry Andric bool Added = false;
6361d5ae102SDimitry Andric for (ReferenceGroupTy &RefGroup : RefGroups) {
637145449b1SDimitry Andric const IndexedReference &Representative = *RefGroup.front();
6381d5ae102SDimitry Andric LLVM_DEBUG({
6391d5ae102SDimitry Andric dbgs() << "References:\n";
6401d5ae102SDimitry Andric dbgs().indent(2) << *R << "\n";
6411d5ae102SDimitry Andric dbgs().indent(2) << Representative << "\n";
6421d5ae102SDimitry Andric });
6431d5ae102SDimitry Andric
644cfca06d7SDimitry Andric
645cfca06d7SDimitry Andric // FIXME: Both positive and negative access functions will be placed
646cfca06d7SDimitry Andric // into the same reference group, resulting in a bi-directional array
647cfca06d7SDimitry Andric // access such as:
648cfca06d7SDimitry Andric // for (i = N; i > 0; i--)
649cfca06d7SDimitry Andric // A[i] = A[N - i];
650cfca06d7SDimitry Andric // having the same cost calculation as a single dimention access pattern
651cfca06d7SDimitry Andric // for (i = 0; i < N; i++)
652cfca06d7SDimitry Andric // A[i] = A[i];
653cfca06d7SDimitry Andric // when in actuality, depending on the array size, the first example
654cfca06d7SDimitry Andric // should have a cost closer to 2x the second due to the two cache
655cfca06d7SDimitry Andric // access per iteration from opposite ends of the array
656e3b55780SDimitry Andric std::optional<bool> HasTemporalReuse =
6571d5ae102SDimitry Andric R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
658e3b55780SDimitry Andric std::optional<bool> HasSpacialReuse =
6591d5ae102SDimitry Andric R->hasSpacialReuse(Representative, CLS, AA);
6601d5ae102SDimitry Andric
661145449b1SDimitry Andric if ((HasTemporalReuse && *HasTemporalReuse) ||
662145449b1SDimitry Andric (HasSpacialReuse && *HasSpacialReuse)) {
6631d5ae102SDimitry Andric RefGroup.push_back(std::move(R));
6641d5ae102SDimitry Andric Added = true;
6651d5ae102SDimitry Andric break;
6661d5ae102SDimitry Andric }
6671d5ae102SDimitry Andric }
6681d5ae102SDimitry Andric
6691d5ae102SDimitry Andric if (!Added) {
6701d5ae102SDimitry Andric ReferenceGroupTy RG;
6711d5ae102SDimitry Andric RG.push_back(std::move(R));
6721d5ae102SDimitry Andric RefGroups.push_back(std::move(RG));
6731d5ae102SDimitry Andric }
6741d5ae102SDimitry Andric }
6751d5ae102SDimitry Andric }
6761d5ae102SDimitry Andric
6771d5ae102SDimitry Andric if (RefGroups.empty())
6781d5ae102SDimitry Andric return false;
6791d5ae102SDimitry Andric
6801d5ae102SDimitry Andric LLVM_DEBUG({
6811d5ae102SDimitry Andric dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
6821d5ae102SDimitry Andric int n = 1;
6831d5ae102SDimitry Andric for (const ReferenceGroupTy &RG : RefGroups) {
6841d5ae102SDimitry Andric dbgs().indent(2) << "RefGroup " << n << ":\n";
6851d5ae102SDimitry Andric for (const auto &IR : RG)
6861d5ae102SDimitry Andric dbgs().indent(4) << *IR << "\n";
6871d5ae102SDimitry Andric n++;
6881d5ae102SDimitry Andric }
6891d5ae102SDimitry Andric dbgs() << "\n";
6901d5ae102SDimitry Andric });
6911d5ae102SDimitry Andric
6921d5ae102SDimitry Andric return true;
6931d5ae102SDimitry Andric }
6941d5ae102SDimitry Andric
6951d5ae102SDimitry Andric CacheCostTy
computeLoopCacheCost(const Loop & L,const ReferenceGroupsTy & RefGroups) const6961d5ae102SDimitry Andric CacheCost::computeLoopCacheCost(const Loop &L,
6971d5ae102SDimitry Andric const ReferenceGroupsTy &RefGroups) const {
6981d5ae102SDimitry Andric if (!L.isLoopSimplifyForm())
6991d5ae102SDimitry Andric return InvalidCost;
7001d5ae102SDimitry Andric
7011d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
7021d5ae102SDimitry Andric << "' as innermost loop.\n");
7031d5ae102SDimitry Andric
7041d5ae102SDimitry Andric // Compute the product of the trip counts of each other loop in the nest.
7051d5ae102SDimitry Andric CacheCostTy TripCountsProduct = 1;
7061d5ae102SDimitry Andric for (const auto &TC : TripCounts) {
7071d5ae102SDimitry Andric if (TC.first == &L)
7081d5ae102SDimitry Andric continue;
7091d5ae102SDimitry Andric TripCountsProduct *= TC.second;
7101d5ae102SDimitry Andric }
7111d5ae102SDimitry Andric
7121d5ae102SDimitry Andric CacheCostTy LoopCost = 0;
7131d5ae102SDimitry Andric for (const ReferenceGroupTy &RG : RefGroups) {
7141d5ae102SDimitry Andric CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
7151d5ae102SDimitry Andric LoopCost += RefGroupCost * TripCountsProduct;
7161d5ae102SDimitry Andric }
7171d5ae102SDimitry Andric
7181d5ae102SDimitry Andric LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
7191d5ae102SDimitry Andric << "' has cost=" << LoopCost << "\n");
7201d5ae102SDimitry Andric
7211d5ae102SDimitry Andric return LoopCost;
7221d5ae102SDimitry Andric }
7231d5ae102SDimitry Andric
computeRefGroupCacheCost(const ReferenceGroupTy & RG,const Loop & L) const7241d5ae102SDimitry Andric CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
7251d5ae102SDimitry Andric const Loop &L) const {
7261d5ae102SDimitry Andric assert(!RG.empty() && "Reference group should have at least one member.");
7271d5ae102SDimitry Andric
7281d5ae102SDimitry Andric const IndexedReference *Representative = RG.front().get();
7291d5ae102SDimitry Andric return Representative->computeRefCost(L, TTI.getCacheLineSize());
7301d5ae102SDimitry Andric }
7311d5ae102SDimitry Andric
7321d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
7331d5ae102SDimitry Andric // LoopCachePrinterPass implementation
7341d5ae102SDimitry Andric //
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)7351d5ae102SDimitry Andric PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
7361d5ae102SDimitry Andric LoopStandardAnalysisResults &AR,
7371d5ae102SDimitry Andric LPMUpdater &U) {
7381d5ae102SDimitry Andric Function *F = L.getHeader()->getParent();
7391d5ae102SDimitry Andric DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
7401d5ae102SDimitry Andric
7411d5ae102SDimitry Andric if (auto CC = CacheCost::getCacheCost(L, AR, DI))
7421d5ae102SDimitry Andric OS << *CC;
7431d5ae102SDimitry Andric
7441d5ae102SDimitry Andric return PreservedAnalyses::all();
7451d5ae102SDimitry Andric }
746