Lines Matching full:loop

1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
9 // This Pass handles loop interchange transform.
52 #define DEBUG_TYPE "loop-interchange"
57 "loop-interchange-threshold", cl::init(0), cl::Hidden,
62 using LoopVector = SmallVector<Loop *, 8>;
72 // Maximum loop depth supported.
86 Loop *L, DependenceInfo *DI, in populateDependencyMatrix()
163 << " dependencies inside loop\n"); in populateDependencyMatrix()
173 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
215 static void populateWorklist(Loop &L, LoopVector &LoopList) { in populateWorklist()
217 << L.getHeader()->getParent()->getName() << " Loop: %" in populateWorklist()
220 Loop *CurrentLoop = &L; in populateWorklist()
221 const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); in populateWorklist()
223 // The current loop has multiple subloops in it hence it is not tightly in populateWorklist()
240 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
243 LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, in LoopInterchangeLegality()
253 bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
255 /// Check if the loop structure is understood. We do not handle triangular
270 bool tightlyNested(Loop *Outer, Loop *Inner);
275 /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
277 bool findInductionAndReductions(Loop *L,
279 Loop *InnerLoop);
281 Loop *OuterLoop;
282 Loop *InnerLoop;
290 /// outer loop.
293 /// Set of inner loop induction PHIs
298 /// loop.
301 LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, in LoopInterchangeProfitability()
305 /// Check if the loop interchange is profitable.
306 bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
309 const DenseMap<const Loop *, unsigned> &CostMap,
315 const DenseMap<const Loop *, unsigned> &CostMap,
321 Loop *OuterLoop;
322 Loop *InnerLoop;
331 /// LoopInterchangeTransform interchanges the loop.
334 LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, in LoopInterchangeTransform()
341 void restructureLoops(Loop *NewInner, Loop *NewOuter,
344 void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
350 Loop *OuterLoop;
351 Loop *InnerLoop;
377 bool run(Loop *L) { in run()
380 SmallVector<Loop *, 8> LoopList; in run()
386 SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end()); in run()
393 bool isComputableLoopNest(ArrayRef<Loop *> LoopList) { in isComputableLoopNest()
394 for (Loop *L : LoopList) { in isComputableLoopNest()
405 LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n"); in isComputableLoopNest()
412 unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) { in selectLoopForInterchange()
413 // TODO: Add a better heuristic to select the loop to be interchanged based in selectLoopForInterchange()
414 // on the dependence matrix. Currently we select the innermost loop. in selectLoopForInterchange()
418 bool processLoopList(SmallVectorImpl<Loop *> &LoopList) { in processLoopList()
422 LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); in processLoopList()
431 LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); in processLoopList()
439 Loop *OuterMostLoop = *(LoopList.begin()); in processLoopList()
450 // Get the Outermost loop exit. in processLoopList()
458 // Obtain the loop vector returned from loop cache analysis beforehand, in processLoopList()
459 // and put each <Loop, index> pair into a map for constant time query in processLoopList()
460 // later. Indices in loop vector reprsent the optimal order of the in processLoopList()
461 // corresponding loop, e.g., given a loopnest with depth N, index 0 in processLoopList()
462 // indicates the loop should be placed as the outermost loop and index N in processLoopList()
463 // indicates the loop should be placed as the innermost loop. in processLoopList()
466 DenseMap<const Loop *, unsigned> CostMap; in processLoopList()
475 // the innermost loop, move it outwards to the best possible position in processLoopList()
503 bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, in processLoop()
506 const DenseMap<const Loop *, unsigned> &CostMap) { in processLoop()
526 << "Loop interchanged with enclosing loop."; in processLoop()
547 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { in tightlyNested()
554 // A perfectly nested loop will not have any branch in between the outer and in tightlyNested()
567 LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); in tightlyNested()
569 // and outer loop latch doesn't contain any unsafe instructions. in tightlyNested()
574 // Also make sure the inner loop preheader does not contain any unsafe in tightlyNested()
576 // the outer loop header when interchanging. in tightlyNested()
582 // Ensure the inner loop exit block flows to the outer loop latch possibly in tightlyNested()
587 LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit in tightlyNested()
588 << " does not lead to the outer loop latch.\n";); in tightlyNested()
591 // The inner loop exit block does flow to the outer loop latch and not some in tightlyNested()
593 // moved into the (new) inner loop after interchange. in tightlyNested()
598 // We have a perfect loop nest. in tightlyNested()
641 // LHS and RHS of the inner loop exit condition, e.g., in isLoopStructureUnderstood()
646 // Check if V only involves inner loop induction variable. in isLoopStructureUnderstood()
667 // In case of multiple inner loop indvars, it is okay if LHS and RHS in isLoopStructureUnderstood()
673 // related variable (Left) with a outer loop invariant (Right). in isLoopStructureUnderstood()
706 static PHINode *findInnerReductionPhi(Loop *L, Value *V) { in findInnerReductionPhi()
730 Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) { in findInductionAndReductions()
738 // PHIs in inner loops need to be part of a reduction in the outer loop, in findInductionAndReductions()
739 // discovered when checking the PHIs of the outer loop earlier. in findInductionAndReductions()
742 LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions " in findInductionAndReductions()
743 "across the outer loop.\n"); in findInductionAndReductions()
748 "Phis in loop header should have exactly 2 incoming values"); in findInductionAndReductions()
749 // Check if we have a PHI node in the outer loop that has a reduction in findInductionAndReductions()
750 // result from the inner loop as an incoming value. in findInductionAndReductions()
773 // transform currently expects the loop latches to also be the exiting in currentLimitations()
808 // For multi-level loop nests, make sure that all phi nodes for inner loops in currentLimitations()
811 Loop *CurLevelLoop = OuterLoop; in currentLimitations()
813 // We already made sure that the loop nest is tightly nested. in currentLimitations()
832 LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n"); in currentLimitations()
837 << "Inner loop structure not understood currently."; in currentLimitations()
846 Loop *L, SmallVectorImpl<PHINode *> &Inductions) { in findInductions()
855 // We currently only support LCSSA PHI nodes in the inner loop exit, if their
856 // users are either reduction PHIs or PHIs outside the outer loop (which means
857 // the we are only interested in the final value after the loop).
859 areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, in areInnerLoopExitPHIsSupported()
863 // Reduction lcssa phi will have only 1 incoming block that from loop latch. in areInnerLoopExitPHIsSupported()
877 // We currently support LCSSA PHI nodes in the outer loop exit, if their
878 // incoming values do not come from the outer loop latch or if the
879 // outer loop latch has a single predecessor. In that case, the value will
880 // be available if both the inner and outer loop conditions are true, which
882 // that may not be the case, e.g. because the outer loop latch may be executed
883 // if the inner loop is not executed.
884 static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { in areOuterLoopExitPHIsSupported()
892 // The incoming value is defined in the outer loop latch. Currently we in areOuterLoopExitPHIsSupported()
893 // only support that in case the outer loop latch has a single predecessor. in areOuterLoopExitPHIsSupported()
894 // This guarantees that the outer loop latch is executed if and only if in areOuterLoopExitPHIsSupported()
895 // the inner loop is executed (because tightlyNested() guarantees that the in areOuterLoopExitPHIsSupported()
896 // outer loop header only branches to the inner loop or the outer loop in areOuterLoopExitPHIsSupported()
899 // if the values are produced outside the loop latch. We would need in areOuterLoopExitPHIsSupported()
916 // multi-level loop nests.
917 static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { in areInnerLoopLatchPHIsSupported()
921 // further inside the looploop, e.g., in the innermost loop, will be available in areInnerLoopLatchPHIsSupported()
958 // Check if outer and inner loop contain legal instructions only. in canInterchangeLoops()
979 LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n"); in canInterchangeLoops()
984 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"); in canInterchangeLoops()
990 "in inner loop latch."; in canInterchangeLoops()
1017 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"); in canInterchangeLoops()
1022 << "Found unsupported PHI node in loop exit."; in canInterchangeLoops()
1028 LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"); in canInterchangeLoops()
1033 << "Found unsupported PHI node in loop exit."; in canInterchangeLoops()
1097 const DenseMap<const Loop *, unsigned> &CostMap, in isProfitablePerLoopCacheAnalysis() argument
1099 // This is the new cost model returned from loop cache analysis. in isProfitablePerLoopCacheAnalysis()
1100 // A smaller index means the loop should be placed an outer loop, and vice in isProfitablePerLoopCacheAnalysis()
1111 "numbers to each loop"); in isProfitablePerLoopCacheAnalysis()
1135 // If the inner loop is loop independent or doesn't carry any dependency in isProfitableForVectorization()
1137 // likely able to do inner loop vectorization already. in isProfitableForVectorization()
1141 // If the outer loop is not loop independent it is not profitable to move in isProfitableForVectorization()
1142 // this to inner position, since doing so would not enable inner loop in isProfitableForVectorization()
1147 // If inner loop has dependence and outer loop is loop independent then it in isProfitableForVectorization()
1148 // is/ profitable to interchange to enable inner loop parallelism. in isProfitableForVectorization()
1154 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, in isProfitable()
1156 const DenseMap<const Loop *, unsigned> &CostMap, in isProfitable() argument
1158 // isProfitable() is structured to avoid endless loop interchange. in isProfitable()
1159 // If loop cache analysis could decide the profitability then, in isProfitable()
1179 << "Insufficient information to calculate the cost of loop for " in isProfitable()
1196 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, in removeChildLoop()
1197 Loop *InnerLoop) { in removeChildLoop()
1198 for (Loop *L : *OuterLoop) in removeChildLoop()
1203 llvm_unreachable("Couldn't find loop"); in removeChildLoop()
1207 /// new inner and outer loop after interchanging: NewInner is the original
1208 /// outer loop and NewOuter is the original inner loop.
1230 Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader, in restructureLoops()
1232 Loop *OuterLoopParent = OuterLoop->getParentLoop(); in restructureLoops()
1233 // The original inner loop preheader moves from the new inner loop to in restructureLoops()
1234 // the parent loop, if there is one. in restructureLoops()
1238 // Switch the loop levels. in restructureLoops()
1240 // Remove the loop from its parent loop. in restructureLoops()
1252 // BBs from the original inner loop. in restructureLoops()
1255 // Add BBs from the original outer loop to the original inner loop (excluding in restructureLoops()
1256 // BBs already in inner loop) in restructureLoops()
1261 // Now remove inner loop header and latch from the new inner loop and move in restructureLoops()
1262 // other BBs (the loop body) to the new inner loop. in restructureLoops()
1269 // Remove the new outer loop header and latch from the new inner loop. in restructureLoops()
1276 // The preheader of the original outer loop becomes part of the new in restructureLoops()
1277 // outer loop. in restructureLoops()
1290 LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); in transform()
1293 LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); in transform()
1307 // Create a new latch block for the inner loop. We split at the in transform()
1309 // operands that are not either loop-invariant or the induction PHI into the in transform()
1325 "the loop nest!"); in transform()
1334 // outside the inner loop or are the induction PHI. in transform()
1358 // Ensure the inner loop phi nodes have a separate basic block. in transform()
1365 // Instructions in the original inner loop preheader may depend on values in transform()
1366 // defined in the outer loop header. Move them there, because the original in transform()
1367 // inner loop preheader will become the entry into the interchanged loop nest. in transform()
1369 // instructions outside the loop nest. in transform()
1447 Loop *InnerLoop, LoopInfo *LI) { in moveLCSSAPhis()
1449 // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are in moveLCSSAPhis()
1451 // latch of the new outer loop, and the only possible users can PHI nodes in moveLCSSAPhis()
1452 // in the exit block of the loop nest or the outer loop header (reduction in moveLCSSAPhis()
1453 // PHIs, in that case, the incoming value must be defined in the inner loop in moveLCSSAPhis()
1463 // value defined from the innermost loop. in moveLCSSAPhis()
1465 // Skip phis with incoming values from the inner loop body, excluding the in moveLCSSAPhis()
1477 "Can only replace phis iff the uses are in the loop nest exit or " in moveLCSSAPhis()
1479 "dominate all loop blocks after interchanging)"); in moveLCSSAPhis()
1492 // Lcssa PHIs for values used outside the inner loop are in InnerExit. in moveLCSSAPhis()
1494 // interchanged loop and we have to preserve it. We move these to in moveLCSSAPhis()
1496 // loop after interchanging. in moveLCSSAPhis()
1500 // If the inner loop latch contains LCSSA PHIs, those come from a child loop in moveLCSSAPhis()
1505 // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have in moveLCSSAPhis()
1506 // incoming values defined in the outer loop, we have to add a new PHI in moveLCSSAPhis()
1507 // in the inner loop latch, which became the exit block of the outer loop, in moveLCSSAPhis()
1513 // Skip Phis with incoming values defined in the inner loop. Those should in moveLCSSAPhis()
1549 InnerLoopPreHeader && "Guaranteed by loop-simplify form"); in adjustLoopBranches()
1562 // Adjust the loop preheader in adjustLoopBranches()
1598 // Adjust Loop Preheader and headers. in adjustLoopBranches()
1599 // The branches in the outer loop predecessor and the outer loop header can in adjustLoopBranches()
1604 // The outer loop header might or might not branch to the outer latch. in adjustLoopBranches()
1605 // We are guaranteed to branch to the inner loop preheader. in adjustLoopBranches()
1623 // -------------Adjust loop latches----------- in adjustLoopBranches()
1649 // For PHIs in the exit block of the outer loop, outer's latch has been in adjustLoopBranches()
1654 // Now update the reduction PHIs in the inner and outer loop headers. in adjustLoopBranches()
1664 // Now move the remaining reduction PHIs from outer to inner loop header and in adjustLoopBranches()
1666 // outer loop and all the remains to do is and updating the incoming blocks. in adjustLoopBranches()
1668 LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();); in adjustLoopBranches()
1673 LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();); in adjustLoopBranches()
1684 // Values defined in the outer loop header could be used in the inner loop in adjustLoopBranches()
1686 // interchanging they will be defined in the new inner loop and used in the in adjustLoopBranches()
1687 // new outer loop. in adjustLoopBranches()
1698 // Adjust all branches in the inner and outer loop. in adjustLoopLinks()
1703 // preheader was previously executed inside the outer loop. in adjustLoopLinks()