1eb11fae6SDimitry Andric ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2a303c417SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a303c417SDimitry Andric //
7a303c417SDimitry Andric //===----------------------------------------------------------------------===//
8a303c417SDimitry Andric
97ab83427SDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
107af96fb3SDimitry Andric #include "llvm/ADT/DenseMap.h"
117ab83427SDimitry Andric #include "llvm/ADT/STLExtras.h"
127af96fb3SDimitry Andric #include "llvm/ADT/Sequence.h"
137af96fb3SDimitry Andric #include "llvm/ADT/SetVector.h"
14a303c417SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
157af96fb3SDimitry Andric #include "llvm/ADT/SmallVector.h"
16a303c417SDimitry Andric #include "llvm/ADT/Statistic.h"
177af96fb3SDimitry Andric #include "llvm/ADT/Twine.h"
18a303c417SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
19e3b55780SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
20eb11fae6SDimitry Andric #include "llvm/Analysis/CFG.h"
21044eb2f6SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
227fa27ce4SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
23d8e91e46SDimitry Andric #include "llvm/Analysis/GuardUtils.h"
247af96fb3SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
25a303c417SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
26eb11fae6SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
27d8e91e46SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
28d8e91e46SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
29b60736ecSDimitry Andric #include "llvm/Analysis/MustExecute.h"
30e3b55780SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
31b60736ecSDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
32145449b1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
33c0981da4SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
347af96fb3SDimitry Andric #include "llvm/IR/BasicBlock.h"
357af96fb3SDimitry Andric #include "llvm/IR/Constant.h"
36a303c417SDimitry Andric #include "llvm/IR/Constants.h"
37a303c417SDimitry Andric #include "llvm/IR/Dominators.h"
38a303c417SDimitry Andric #include "llvm/IR/Function.h"
39b60736ecSDimitry Andric #include "llvm/IR/IRBuilder.h"
407af96fb3SDimitry Andric #include "llvm/IR/InstrTypes.h"
417af96fb3SDimitry Andric #include "llvm/IR/Instruction.h"
42a303c417SDimitry Andric #include "llvm/IR/Instructions.h"
43044eb2f6SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
44ac9a064cSDimitry Andric #include "llvm/IR/Module.h"
45344a3780SDimitry Andric #include "llvm/IR/PatternMatch.h"
467fa27ce4SDimitry Andric #include "llvm/IR/ProfDataUtils.h"
477af96fb3SDimitry Andric #include "llvm/IR/Use.h"
487af96fb3SDimitry Andric #include "llvm/IR/Value.h"
497af96fb3SDimitry Andric #include "llvm/Support/Casting.h"
50706b4fc4SDimitry Andric #include "llvm/Support/CommandLine.h"
51a303c417SDimitry Andric #include "llvm/Support/Debug.h"
527af96fb3SDimitry Andric #include "llvm/Support/ErrorHandling.h"
537af96fb3SDimitry Andric #include "llvm/Support/GenericDomTree.h"
54145449b1SDimitry Andric #include "llvm/Support/InstructionCost.h"
55a303c417SDimitry Andric #include "llvm/Support/raw_ostream.h"
56145449b1SDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h"
57a303c417SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
58044eb2f6SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
59b60736ecSDimitry Andric #include "llvm/Transforms/Utils/Local.h"
60a303c417SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
61044eb2f6SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
627af96fb3SDimitry Andric #include <algorithm>
637af96fb3SDimitry Andric #include <cassert>
647af96fb3SDimitry Andric #include <iterator>
65044eb2f6SDimitry Andric #include <numeric>
66e3b55780SDimitry Andric #include <optional>
677af96fb3SDimitry Andric #include <utility>
68a303c417SDimitry Andric
69a303c417SDimitry Andric #define DEBUG_TYPE "simple-loop-unswitch"
70a303c417SDimitry Andric
71a303c417SDimitry Andric using namespace llvm;
72344a3780SDimitry Andric using namespace llvm::PatternMatch;
73a303c417SDimitry Andric
74a303c417SDimitry Andric STATISTIC(NumBranches, "Number of branches unswitched");
75a303c417SDimitry Andric STATISTIC(NumSwitches, "Number of switches unswitched");
767fa27ce4SDimitry Andric STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
77d8e91e46SDimitry Andric STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
78a303c417SDimitry Andric STATISTIC(NumTrivial, "Number of unswitches that are trivial");
79d8e91e46SDimitry Andric STATISTIC(
80d8e91e46SDimitry Andric NumCostMultiplierSkipped,
81d8e91e46SDimitry Andric "Number of unswitch candidates that had their cost multiplier skipped");
827fa27ce4SDimitry Andric STATISTIC(NumInvariantConditionsInjected,
837fa27ce4SDimitry Andric "Number of invariant conditions injected and unswitched");
84a303c417SDimitry Andric
85044eb2f6SDimitry Andric static cl::opt<bool> EnableNonTrivialUnswitch(
86044eb2f6SDimitry Andric "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
87044eb2f6SDimitry Andric cl::desc("Forcibly enables non-trivial loop unswitching rather than "
88044eb2f6SDimitry Andric "following the configuration passed into the pass."));
89044eb2f6SDimitry Andric
90044eb2f6SDimitry Andric static cl::opt<int>
91044eb2f6SDimitry Andric UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
92044eb2f6SDimitry Andric cl::desc("The cost threshold for unswitching a loop."));
93044eb2f6SDimitry Andric
94d8e91e46SDimitry Andric static cl::opt<bool> EnableUnswitchCostMultiplier(
95d8e91e46SDimitry Andric "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
96d8e91e46SDimitry Andric cl::desc("Enable unswitch cost multiplier that prohibits exponential "
97d8e91e46SDimitry Andric "explosion in nontrivial unswitch."));
98d8e91e46SDimitry Andric static cl::opt<int> UnswitchSiblingsToplevelDiv(
99d8e91e46SDimitry Andric "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
100d8e91e46SDimitry Andric cl::desc("Toplevel siblings divisor for cost multiplier."));
101d8e91e46SDimitry Andric static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
102d8e91e46SDimitry Andric "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
103d8e91e46SDimitry Andric cl::desc("Number of unswitch candidates that are ignored when calculating "
104d8e91e46SDimitry Andric "cost multiplier."));
105d8e91e46SDimitry Andric static cl::opt<bool> UnswitchGuards(
106d8e91e46SDimitry Andric "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
107d8e91e46SDimitry Andric cl::desc("If enabled, simple loop unswitching will also consider "
108d8e91e46SDimitry Andric "llvm.experimental.guard intrinsics as unswitch candidates."));
109b60736ecSDimitry Andric static cl::opt<bool> DropNonTrivialImplicitNullChecks(
110b60736ecSDimitry Andric "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
111b60736ecSDimitry Andric cl::init(false), cl::Hidden,
112b60736ecSDimitry Andric cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
113b60736ecSDimitry Andric "null checks to save time analyzing if we can keep it."));
114344a3780SDimitry Andric static cl::opt<unsigned>
115344a3780SDimitry Andric MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
116344a3780SDimitry Andric cl::desc("Max number of memory uses to explore during "
117344a3780SDimitry Andric "partial unswitching analysis"),
118344a3780SDimitry Andric cl::init(100), cl::Hidden);
119c0981da4SDimitry Andric static cl::opt<bool> FreezeLoopUnswitchCond(
120145449b1SDimitry Andric "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden,
121c0981da4SDimitry Andric cl::desc("If enabled, the freeze instruction will be added to condition "
122c0981da4SDimitry Andric "of loop unswitch to prevent miscompilation."));
123d8e91e46SDimitry Andric
1247fa27ce4SDimitry Andric static cl::opt<bool> InjectInvariantConditions(
1257fa27ce4SDimitry Andric "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
1267fa27ce4SDimitry Andric cl::desc("Whether we should inject new invariants and unswitch them to "
1277fa27ce4SDimitry Andric "eliminate some existing (non-invariant) conditions."),
1287fa27ce4SDimitry Andric cl::init(true));
1297fa27ce4SDimitry Andric
1307fa27ce4SDimitry Andric static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
1317fa27ce4SDimitry Andric "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
1327fa27ce4SDimitry Andric cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
1337fa27ce4SDimitry Andric "unswitch on them to eliminate branches that are "
1347fa27ce4SDimitry Andric "not-taken 1/<this option> times or less."),
1357fa27ce4SDimitry Andric cl::init(16));
1367fa27ce4SDimitry Andric
137ac9a064cSDimitry Andric AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
138e3b55780SDimitry Andric namespace {
1397fa27ce4SDimitry Andric struct CompareDesc {
1407fa27ce4SDimitry Andric BranchInst *Term;
1417fa27ce4SDimitry Andric Value *Invariant;
1427fa27ce4SDimitry Andric BasicBlock *InLoopSucc;
1437fa27ce4SDimitry Andric
CompareDesc__anon10602aa10111::CompareDesc1447fa27ce4SDimitry Andric CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
1457fa27ce4SDimitry Andric : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
1467fa27ce4SDimitry Andric };
1477fa27ce4SDimitry Andric
1487fa27ce4SDimitry Andric struct InjectedInvariant {
1497fa27ce4SDimitry Andric ICmpInst::Predicate Pred;
1507fa27ce4SDimitry Andric Value *LHS;
1517fa27ce4SDimitry Andric Value *RHS;
1527fa27ce4SDimitry Andric BasicBlock *InLoopSucc;
1537fa27ce4SDimitry Andric
InjectedInvariant__anon10602aa10111::InjectedInvariant1547fa27ce4SDimitry Andric InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
1557fa27ce4SDimitry Andric BasicBlock *InLoopSucc)
1567fa27ce4SDimitry Andric : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
1577fa27ce4SDimitry Andric };
1587fa27ce4SDimitry Andric
159e3b55780SDimitry Andric struct NonTrivialUnswitchCandidate {
160e3b55780SDimitry Andric Instruction *TI = nullptr;
161e3b55780SDimitry Andric TinyPtrVector<Value *> Invariants;
162e3b55780SDimitry Andric std::optional<InstructionCost> Cost;
1637fa27ce4SDimitry Andric std::optional<InjectedInvariant> PendingInjection;
NonTrivialUnswitchCandidate__anon10602aa10111::NonTrivialUnswitchCandidate164e3b55780SDimitry Andric NonTrivialUnswitchCandidate(
165e3b55780SDimitry Andric Instruction *TI, ArrayRef<Value *> Invariants,
1667fa27ce4SDimitry Andric std::optional<InstructionCost> Cost = std::nullopt,
1677fa27ce4SDimitry Andric std::optional<InjectedInvariant> PendingInjection = std::nullopt)
1687fa27ce4SDimitry Andric : TI(TI), Invariants(Invariants), Cost(Cost),
1697fa27ce4SDimitry Andric PendingInjection(PendingInjection) {};
1707fa27ce4SDimitry Andric
hasPendingInjection__anon10602aa10111::NonTrivialUnswitchCandidate1717fa27ce4SDimitry Andric bool hasPendingInjection() const { return PendingInjection.has_value(); }
172e3b55780SDimitry Andric };
173e3b55780SDimitry Andric } // end anonymous namespace.
174e3b55780SDimitry Andric
175145449b1SDimitry Andric // Helper to skip (select x, true, false), which matches both a logical AND and
176145449b1SDimitry Andric // OR and can confuse code that tries to determine if \p Cond is either a
177145449b1SDimitry Andric // logical AND or OR but not both.
skipTrivialSelect(Value * Cond)178145449b1SDimitry Andric static Value *skipTrivialSelect(Value *Cond) {
179145449b1SDimitry Andric Value *CondNext;
180145449b1SDimitry Andric while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
181145449b1SDimitry Andric Cond = CondNext;
182145449b1SDimitry Andric return Cond;
183145449b1SDimitry Andric }
184145449b1SDimitry Andric
185eb11fae6SDimitry Andric /// Collect all of the loop invariant input values transitively used by the
186eb11fae6SDimitry Andric /// homogeneous instruction graph from a given root.
187eb11fae6SDimitry Andric ///
188eb11fae6SDimitry Andric /// This essentially walks from a root recursively through loop variant operands
189145449b1SDimitry Andric /// which have perform the same logical operation (AND or OR) and finds all
190145449b1SDimitry Andric /// inputs which are loop invariant. For some operations these can be
191145449b1SDimitry Andric /// re-associated and unswitched out of the loop entirely.
192eb11fae6SDimitry Andric static TinyPtrVector<Value *>
collectHomogenousInstGraphLoopInvariants(const Loop & L,Instruction & Root,const LoopInfo & LI)193e3b55780SDimitry Andric collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root,
194e3b55780SDimitry Andric const LoopInfo &LI) {
195eb11fae6SDimitry Andric assert(!L.isLoopInvariant(&Root) &&
196eb11fae6SDimitry Andric "Only need to walk the graph if root itself is not invariant.");
197eb11fae6SDimitry Andric TinyPtrVector<Value *> Invariants;
198eb11fae6SDimitry Andric
199344a3780SDimitry Andric bool IsRootAnd = match(&Root, m_LogicalAnd());
200344a3780SDimitry Andric bool IsRootOr = match(&Root, m_LogicalOr());
201344a3780SDimitry Andric
202eb11fae6SDimitry Andric // Build a worklist and recurse through operators collecting invariants.
203eb11fae6SDimitry Andric SmallVector<Instruction *, 4> Worklist;
204eb11fae6SDimitry Andric SmallPtrSet<Instruction *, 8> Visited;
205eb11fae6SDimitry Andric Worklist.push_back(&Root);
206eb11fae6SDimitry Andric Visited.insert(&Root);
207eb11fae6SDimitry Andric do {
208eb11fae6SDimitry Andric Instruction &I = *Worklist.pop_back_val();
209eb11fae6SDimitry Andric for (Value *OpV : I.operand_values()) {
210eb11fae6SDimitry Andric // Skip constants as unswitching isn't interesting for them.
211eb11fae6SDimitry Andric if (isa<Constant>(OpV))
212eb11fae6SDimitry Andric continue;
213eb11fae6SDimitry Andric
214eb11fae6SDimitry Andric // Add it to our result if loop invariant.
215eb11fae6SDimitry Andric if (L.isLoopInvariant(OpV)) {
216eb11fae6SDimitry Andric Invariants.push_back(OpV);
217eb11fae6SDimitry Andric continue;
218eb11fae6SDimitry Andric }
219eb11fae6SDimitry Andric
220eb11fae6SDimitry Andric // If not an instruction with the same opcode, nothing we can do.
221145449b1SDimitry Andric Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV));
222eb11fae6SDimitry Andric
223344a3780SDimitry Andric if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
224344a3780SDimitry Andric (IsRootOr && match(OpI, m_LogicalOr())))) {
225eb11fae6SDimitry Andric // Visit this operand.
226eb11fae6SDimitry Andric if (Visited.insert(OpI).second)
227eb11fae6SDimitry Andric Worklist.push_back(OpI);
228eb11fae6SDimitry Andric }
229344a3780SDimitry Andric }
230eb11fae6SDimitry Andric } while (!Worklist.empty());
231eb11fae6SDimitry Andric
232eb11fae6SDimitry Andric return Invariants;
233eb11fae6SDimitry Andric }
234eb11fae6SDimitry Andric
replaceLoopInvariantUses(const Loop & L,Value * Invariant,Constant & Replacement)235e3b55780SDimitry Andric static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
236a303c417SDimitry Andric Constant &Replacement) {
237eb11fae6SDimitry Andric assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
238a303c417SDimitry Andric
239a303c417SDimitry Andric // Replace uses of LIC in the loop with the given constant.
240344a3780SDimitry Andric // We use make_early_inc_range as set invalidates the iterator.
241344a3780SDimitry Andric for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
242344a3780SDimitry Andric Instruction *UserI = dyn_cast<Instruction>(U.getUser());
243a303c417SDimitry Andric
244a303c417SDimitry Andric // Replace this use within the loop body.
245eb11fae6SDimitry Andric if (UserI && L.contains(UserI))
246344a3780SDimitry Andric U.set(&Replacement);
247a303c417SDimitry Andric }
248a303c417SDimitry Andric }
249a303c417SDimitry Andric
2506b3f41edSDimitry Andric /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
2516b3f41edSDimitry Andric /// incoming values along this edge.
areLoopExitPHIsLoopInvariant(const Loop & L,const BasicBlock & ExitingBB,const BasicBlock & ExitBB)252e3b55780SDimitry Andric static bool areLoopExitPHIsLoopInvariant(const Loop &L,
253e3b55780SDimitry Andric const BasicBlock &ExitingBB,
254e3b55780SDimitry Andric const BasicBlock &ExitBB) {
255e3b55780SDimitry Andric for (const Instruction &I : ExitBB) {
2566b3f41edSDimitry Andric auto *PN = dyn_cast<PHINode>(&I);
2576b3f41edSDimitry Andric if (!PN)
2586b3f41edSDimitry Andric // No more PHIs to check.
2596b3f41edSDimitry Andric return true;
2606b3f41edSDimitry Andric
2616b3f41edSDimitry Andric // If the incoming value for this edge isn't loop invariant the unswitch
2626b3f41edSDimitry Andric // won't be trivial.
2636b3f41edSDimitry Andric if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
2646b3f41edSDimitry Andric return false;
2656b3f41edSDimitry Andric }
2666b3f41edSDimitry Andric llvm_unreachable("Basic blocks should never be empty!");
2676b3f41edSDimitry Andric }
2686b3f41edSDimitry Andric
269344a3780SDimitry Andric /// Copy a set of loop invariant values \p ToDuplicate and insert them at the
270344a3780SDimitry Andric /// end of \p BB and conditionally branch on the copied condition. We only
271344a3780SDimitry Andric /// branch on a single value.
buildPartialUnswitchConditionalBranch(BasicBlock & BB,ArrayRef<Value * > Invariants,bool Direction,BasicBlock & UnswitchedSucc,BasicBlock & NormalSucc,bool InsertFreeze,const Instruction * I,AssumptionCache * AC,const DominatorTree & DT)272c0981da4SDimitry Andric static void buildPartialUnswitchConditionalBranch(
273c0981da4SDimitry Andric BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
274145449b1SDimitry Andric BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
275e3b55780SDimitry Andric const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
276eb11fae6SDimitry Andric IRBuilder<> IRB(&BB);
277eb11fae6SDimitry Andric
278145449b1SDimitry Andric SmallVector<Value *> FrozenInvariants;
279145449b1SDimitry Andric for (Value *Inv : Invariants) {
280145449b1SDimitry Andric if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT))
281145449b1SDimitry Andric Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr");
282145449b1SDimitry Andric FrozenInvariants.push_back(Inv);
283145449b1SDimitry Andric }
284145449b1SDimitry Andric
285145449b1SDimitry Andric Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
286145449b1SDimitry Andric : IRB.CreateAnd(FrozenInvariants);
287eb11fae6SDimitry Andric IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
288eb11fae6SDimitry Andric Direction ? &NormalSucc : &UnswitchedSucc);
289eb11fae6SDimitry Andric }
290eb11fae6SDimitry Andric
291344a3780SDimitry Andric /// Copy a set of loop invariant values, and conditionally branch on them.
buildPartialInvariantUnswitchConditionalBranch(BasicBlock & BB,ArrayRef<Value * > ToDuplicate,bool Direction,BasicBlock & UnswitchedSucc,BasicBlock & NormalSucc,Loop & L,MemorySSAUpdater * MSSAU)292344a3780SDimitry Andric static void buildPartialInvariantUnswitchConditionalBranch(
293344a3780SDimitry Andric BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
294344a3780SDimitry Andric BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
295344a3780SDimitry Andric MemorySSAUpdater *MSSAU) {
296344a3780SDimitry Andric ValueToValueMapTy VMap;
297344a3780SDimitry Andric for (auto *Val : reverse(ToDuplicate)) {
298344a3780SDimitry Andric Instruction *Inst = cast<Instruction>(Val);
299344a3780SDimitry Andric Instruction *NewInst = Inst->clone();
300e3b55780SDimitry Andric NewInst->insertInto(&BB, BB.end());
301344a3780SDimitry Andric RemapInstruction(NewInst, VMap,
302344a3780SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
303344a3780SDimitry Andric VMap[Val] = NewInst;
304344a3780SDimitry Andric
305344a3780SDimitry Andric if (!MSSAU)
306344a3780SDimitry Andric continue;
307344a3780SDimitry Andric
308344a3780SDimitry Andric MemorySSA *MSSA = MSSAU->getMemorySSA();
309344a3780SDimitry Andric if (auto *MemUse =
310344a3780SDimitry Andric dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
311344a3780SDimitry Andric auto *DefiningAccess = MemUse->getDefiningAccess();
312344a3780SDimitry Andric // Get the first defining access before the loop.
313344a3780SDimitry Andric while (L.contains(DefiningAccess->getBlock())) {
314344a3780SDimitry Andric // If the defining access is a MemoryPhi, get the incoming
315344a3780SDimitry Andric // value for the pre-header as defining access.
316344a3780SDimitry Andric if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
317344a3780SDimitry Andric DefiningAccess =
318344a3780SDimitry Andric MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
319344a3780SDimitry Andric else
320344a3780SDimitry Andric DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
321344a3780SDimitry Andric }
322344a3780SDimitry Andric MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
323344a3780SDimitry Andric NewInst->getParent(),
324344a3780SDimitry Andric MemorySSA::BeforeTerminator);
325344a3780SDimitry Andric }
326344a3780SDimitry Andric }
327344a3780SDimitry Andric
328344a3780SDimitry Andric IRBuilder<> IRB(&BB);
329344a3780SDimitry Andric Value *Cond = VMap[ToDuplicate[0]];
330344a3780SDimitry Andric IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
331344a3780SDimitry Andric Direction ? &NormalSucc : &UnswitchedSucc);
332344a3780SDimitry Andric }
333344a3780SDimitry Andric
3346b3f41edSDimitry Andric /// Rewrite the PHI nodes in an unswitched loop exit basic block.
3356b3f41edSDimitry Andric ///
3366b3f41edSDimitry Andric /// Requires that the loop exit and unswitched basic block are the same, and
3376b3f41edSDimitry Andric /// that the exiting block was a unique predecessor of that block. Rewrites the
3386b3f41edSDimitry Andric /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
3396b3f41edSDimitry Andric /// PHI nodes from the old preheader that now contains the unswitched
3406b3f41edSDimitry Andric /// terminator.
rewritePHINodesForUnswitchedExitBlock(BasicBlock & UnswitchedBB,BasicBlock & OldExitingBB,BasicBlock & OldPH)3416b3f41edSDimitry Andric static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
3426b3f41edSDimitry Andric BasicBlock &OldExitingBB,
3436b3f41edSDimitry Andric BasicBlock &OldPH) {
344eb11fae6SDimitry Andric for (PHINode &PN : UnswitchedBB.phis()) {
3456b3f41edSDimitry Andric // When the loop exit is directly unswitched we just need to update the
3466b3f41edSDimitry Andric // incoming basic block. We loop to handle weird cases with repeated
3476b3f41edSDimitry Andric // incoming blocks, but expect to typically only have one operand here.
348eb11fae6SDimitry Andric for (auto i : seq<int>(0, PN.getNumOperands())) {
349eb11fae6SDimitry Andric assert(PN.getIncomingBlock(i) == &OldExitingBB &&
3506b3f41edSDimitry Andric "Found incoming block different from unique predecessor!");
351eb11fae6SDimitry Andric PN.setIncomingBlock(i, &OldPH);
3526b3f41edSDimitry Andric }
3536b3f41edSDimitry Andric }
3546b3f41edSDimitry Andric }
3556b3f41edSDimitry Andric
3566b3f41edSDimitry Andric /// Rewrite the PHI nodes in the loop exit basic block and the split off
3576b3f41edSDimitry Andric /// unswitched block.
3586b3f41edSDimitry Andric ///
3596b3f41edSDimitry Andric /// Because the exit block remains an exit from the loop, this rewrites the
3606b3f41edSDimitry Andric /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
3616b3f41edSDimitry Andric /// nodes into the unswitched basic block to select between the value in the
3626b3f41edSDimitry Andric /// old preheader and the loop exit.
rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock & ExitBB,BasicBlock & UnswitchedBB,BasicBlock & OldExitingBB,BasicBlock & OldPH,bool FullUnswitch)3636b3f41edSDimitry Andric static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
3646b3f41edSDimitry Andric BasicBlock &UnswitchedBB,
3656b3f41edSDimitry Andric BasicBlock &OldExitingBB,
366eb11fae6SDimitry Andric BasicBlock &OldPH,
367eb11fae6SDimitry Andric bool FullUnswitch) {
3686b3f41edSDimitry Andric assert(&ExitBB != &UnswitchedBB &&
3696b3f41edSDimitry Andric "Must have different loop exit and unswitched blocks!");
370b1c73532SDimitry Andric BasicBlock::iterator InsertPt = UnswitchedBB.begin();
371eb11fae6SDimitry Andric for (PHINode &PN : ExitBB.phis()) {
372eb11fae6SDimitry Andric auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
373b1c73532SDimitry Andric PN.getName() + ".split");
374b1c73532SDimitry Andric NewPN->insertBefore(InsertPt);
3756b3f41edSDimitry Andric
3766b3f41edSDimitry Andric // Walk backwards over the old PHI node's inputs to minimize the cost of
3776b3f41edSDimitry Andric // removing each one. We have to do this weird loop manually so that we
3786b3f41edSDimitry Andric // create the same number of new incoming edges in the new PHI as we expect
3796b3f41edSDimitry Andric // each case-based edge to be included in the unswitched switch in some
3806b3f41edSDimitry Andric // cases.
3816b3f41edSDimitry Andric // FIXME: This is really, really gross. It would be much cleaner if LLVM
3826b3f41edSDimitry Andric // allowed us to create a single entry for a predecessor block without
3836b3f41edSDimitry Andric // having separate entries for each "edge" even though these edges are
3846b3f41edSDimitry Andric // required to produce identical results.
385eb11fae6SDimitry Andric for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
386eb11fae6SDimitry Andric if (PN.getIncomingBlock(i) != &OldExitingBB)
3876b3f41edSDimitry Andric continue;
3886b3f41edSDimitry Andric
389eb11fae6SDimitry Andric Value *Incoming = PN.getIncomingValue(i);
390eb11fae6SDimitry Andric if (FullUnswitch)
391eb11fae6SDimitry Andric // No more edge from the old exiting block to the exit block.
392eb11fae6SDimitry Andric PN.removeIncomingValue(i);
393eb11fae6SDimitry Andric
3946b3f41edSDimitry Andric NewPN->addIncoming(Incoming, &OldPH);
3956b3f41edSDimitry Andric }
3966b3f41edSDimitry Andric
3976b3f41edSDimitry Andric // Now replace the old PHI with the new one and wire the old one in as an
3986b3f41edSDimitry Andric // input to the new one.
399eb11fae6SDimitry Andric PN.replaceAllUsesWith(NewPN);
400eb11fae6SDimitry Andric NewPN->addIncoming(&PN, &ExitBB);
401eb11fae6SDimitry Andric }
402eb11fae6SDimitry Andric }
403eb11fae6SDimitry Andric
404eb11fae6SDimitry Andric /// Hoist the current loop up to the innermost loop containing a remaining exit.
405eb11fae6SDimitry Andric ///
406eb11fae6SDimitry Andric /// Because we've removed an exit from the loop, we may have changed the set of
407eb11fae6SDimitry Andric /// loops reachable and need to move the current loop up the loop nest or even
408eb11fae6SDimitry Andric /// to an entirely separate nest.
hoistLoopToNewParent(Loop & L,BasicBlock & Preheader,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU,ScalarEvolution * SE)409eb11fae6SDimitry Andric static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
410e6d15924SDimitry Andric DominatorTree &DT, LoopInfo &LI,
411706b4fc4SDimitry Andric MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
412eb11fae6SDimitry Andric // If the loop is already at the top level, we can't hoist it anywhere.
413eb11fae6SDimitry Andric Loop *OldParentL = L.getParentLoop();
414eb11fae6SDimitry Andric if (!OldParentL)
415eb11fae6SDimitry Andric return;
416eb11fae6SDimitry Andric
417eb11fae6SDimitry Andric SmallVector<BasicBlock *, 4> Exits;
418eb11fae6SDimitry Andric L.getExitBlocks(Exits);
419eb11fae6SDimitry Andric Loop *NewParentL = nullptr;
420eb11fae6SDimitry Andric for (auto *ExitBB : Exits)
421eb11fae6SDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB))
422eb11fae6SDimitry Andric if (!NewParentL || NewParentL->contains(ExitL))
423eb11fae6SDimitry Andric NewParentL = ExitL;
424eb11fae6SDimitry Andric
425eb11fae6SDimitry Andric if (NewParentL == OldParentL)
426eb11fae6SDimitry Andric return;
427eb11fae6SDimitry Andric
428eb11fae6SDimitry Andric // The new parent loop (if different) should always contain the old one.
429eb11fae6SDimitry Andric if (NewParentL)
430eb11fae6SDimitry Andric assert(NewParentL->contains(OldParentL) &&
431eb11fae6SDimitry Andric "Can only hoist this loop up the nest!");
432eb11fae6SDimitry Andric
433eb11fae6SDimitry Andric // The preheader will need to move with the body of this loop. However,
434eb11fae6SDimitry Andric // because it isn't in this loop we also need to update the primary loop map.
435eb11fae6SDimitry Andric assert(OldParentL == LI.getLoopFor(&Preheader) &&
436eb11fae6SDimitry Andric "Parent loop of this loop should contain this loop's preheader!");
437eb11fae6SDimitry Andric LI.changeLoopFor(&Preheader, NewParentL);
438eb11fae6SDimitry Andric
439eb11fae6SDimitry Andric // Remove this loop from its old parent.
440eb11fae6SDimitry Andric OldParentL->removeChildLoop(&L);
441eb11fae6SDimitry Andric
442eb11fae6SDimitry Andric // Add the loop either to the new parent or as a top-level loop.
443eb11fae6SDimitry Andric if (NewParentL)
444eb11fae6SDimitry Andric NewParentL->addChildLoop(&L);
445eb11fae6SDimitry Andric else
446eb11fae6SDimitry Andric LI.addTopLevelLoop(&L);
447eb11fae6SDimitry Andric
448eb11fae6SDimitry Andric // Remove this loops blocks from the old parent and every other loop up the
449eb11fae6SDimitry Andric // nest until reaching the new parent. Also update all of these
450eb11fae6SDimitry Andric // no-longer-containing loops to reflect the nesting change.
451eb11fae6SDimitry Andric for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
452eb11fae6SDimitry Andric OldContainingL = OldContainingL->getParentLoop()) {
453eb11fae6SDimitry Andric llvm::erase_if(OldContainingL->getBlocksVector(),
454eb11fae6SDimitry Andric [&](const BasicBlock *BB) {
455eb11fae6SDimitry Andric return BB == &Preheader || L.contains(BB);
456eb11fae6SDimitry Andric });
457eb11fae6SDimitry Andric
458eb11fae6SDimitry Andric OldContainingL->getBlocksSet().erase(&Preheader);
459eb11fae6SDimitry Andric for (BasicBlock *BB : L.blocks())
460eb11fae6SDimitry Andric OldContainingL->getBlocksSet().erase(BB);
461eb11fae6SDimitry Andric
462eb11fae6SDimitry Andric // Because we just hoisted a loop out of this one, we have essentially
463eb11fae6SDimitry Andric // created new exit paths from it. That means we need to form LCSSA PHI
464eb11fae6SDimitry Andric // nodes for values used in the no-longer-nested loop.
465706b4fc4SDimitry Andric formLCSSA(*OldContainingL, DT, &LI, SE);
466eb11fae6SDimitry Andric
467eb11fae6SDimitry Andric // We shouldn't need to form dedicated exits because the exit introduced
468d8e91e46SDimitry Andric // here is the (just split by unswitching) preheader. However, after trivial
469d8e91e46SDimitry Andric // unswitching it is possible to get new non-dedicated exits out of parent
470d8e91e46SDimitry Andric // loop so let's conservatively form dedicated exit blocks and figure out
471d8e91e46SDimitry Andric // if we can optimize later.
472e6d15924SDimitry Andric formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
473e6d15924SDimitry Andric /*PreserveLCSSA*/ true);
4746b3f41edSDimitry Andric }
4756b3f41edSDimitry Andric }
4766b3f41edSDimitry Andric
477706b4fc4SDimitry Andric // Return the top-most loop containing ExitBB and having ExitBB as exiting block
478706b4fc4SDimitry Andric // or the loop containing ExitBB, if there is no parent loop containing ExitBB
479706b4fc4SDimitry Andric // as exiting block.
getTopMostExitingLoop(const BasicBlock * ExitBB,const LoopInfo & LI)4807fa27ce4SDimitry Andric static Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
481e3b55780SDimitry Andric const LoopInfo &LI) {
4827fa27ce4SDimitry Andric Loop *TopMost = LI.getLoopFor(ExitBB);
4837fa27ce4SDimitry Andric Loop *Current = TopMost;
484706b4fc4SDimitry Andric while (Current) {
485706b4fc4SDimitry Andric if (Current->isLoopExiting(ExitBB))
486706b4fc4SDimitry Andric TopMost = Current;
487706b4fc4SDimitry Andric Current = Current->getParentLoop();
488706b4fc4SDimitry Andric }
489706b4fc4SDimitry Andric return TopMost;
490706b4fc4SDimitry Andric }
491706b4fc4SDimitry Andric
492a303c417SDimitry Andric /// Unswitch a trivial branch if the condition is loop invariant.
493a303c417SDimitry Andric ///
494a303c417SDimitry Andric /// This routine should only be called when loop code leading to the branch has
495a303c417SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the
496a303c417SDimitry Andric /// condition is invariant and one of the successors is a loop exit. This
497a303c417SDimitry Andric /// allows us to unswitch without duplicating the loop, making it trivial.
498a303c417SDimitry Andric ///
499a303c417SDimitry Andric /// If this routine fails to unswitch the branch it returns false.
500a303c417SDimitry Andric ///
501a303c417SDimitry Andric /// If the branch can be unswitched, this routine splits the preheader and
502a303c417SDimitry Andric /// hoists the branch above that split. Preserves loop simplified form
503a303c417SDimitry Andric /// (splitting the exit block as necessary). It simplifies the branch within
504a303c417SDimitry Andric /// the loop to an unconditional branch but doesn't remove it entirely. Further
505344a3780SDimitry Andric /// cleanup can be done with some simplifycfg like pass.
506eb11fae6SDimitry Andric ///
507eb11fae6SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
508eb11fae6SDimitry Andric /// invalidated by this.
unswitchTrivialBranch(Loop & L,BranchInst & BI,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)509a303c417SDimitry Andric static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
510d8e91e46SDimitry Andric LoopInfo &LI, ScalarEvolution *SE,
511d8e91e46SDimitry Andric MemorySSAUpdater *MSSAU) {
512a303c417SDimitry Andric assert(BI.isConditional() && "Can only unswitch a conditional branch!");
513eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
514a303c417SDimitry Andric
515eb11fae6SDimitry Andric // The loop invariant values that we want to unswitch.
516eb11fae6SDimitry Andric TinyPtrVector<Value *> Invariants;
517a303c417SDimitry Andric
518eb11fae6SDimitry Andric // When true, we're fully unswitching the branch rather than just unswitching
519eb11fae6SDimitry Andric // some input conditions to the branch.
520eb11fae6SDimitry Andric bool FullUnswitch = false;
521eb11fae6SDimitry Andric
522145449b1SDimitry Andric Value *Cond = skipTrivialSelect(BI.getCondition());
523145449b1SDimitry Andric if (L.isLoopInvariant(Cond)) {
524145449b1SDimitry Andric Invariants.push_back(Cond);
525eb11fae6SDimitry Andric FullUnswitch = true;
526eb11fae6SDimitry Andric } else {
527145449b1SDimitry Andric if (auto *CondInst = dyn_cast<Instruction>(Cond))
528eb11fae6SDimitry Andric Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
529344a3780SDimitry Andric if (Invariants.empty()) {
530344a3780SDimitry Andric LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n");
531a303c417SDimitry Andric return false;
532eb11fae6SDimitry Andric }
533344a3780SDimitry Andric }
534a303c417SDimitry Andric
535eb11fae6SDimitry Andric // Check that one of the branch's successors exits, and which one.
536eb11fae6SDimitry Andric bool ExitDirection = true;
537a303c417SDimitry Andric int LoopExitSuccIdx = 0;
538a303c417SDimitry Andric auto *LoopExitBB = BI.getSuccessor(0);
539eb11fae6SDimitry Andric if (L.contains(LoopExitBB)) {
540eb11fae6SDimitry Andric ExitDirection = false;
541a303c417SDimitry Andric LoopExitSuccIdx = 1;
542a303c417SDimitry Andric LoopExitBB = BI.getSuccessor(1);
543344a3780SDimitry Andric if (L.contains(LoopExitBB)) {
544344a3780SDimitry Andric LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n");
545a303c417SDimitry Andric return false;
546a303c417SDimitry Andric }
547344a3780SDimitry Andric }
548a303c417SDimitry Andric auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
5496b3f41edSDimitry Andric auto *ParentBB = BI.getParent();
550344a3780SDimitry Andric if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
551344a3780SDimitry Andric LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n");
552a303c417SDimitry Andric return false;
553344a3780SDimitry Andric }
554a303c417SDimitry Andric
555eb11fae6SDimitry Andric // When unswitching only part of the branch's condition, we need the exit
556eb11fae6SDimitry Andric // block to be reached directly from the partially unswitched input. This can
557eb11fae6SDimitry Andric // be done when the exit block is along the true edge and the branch condition
558eb11fae6SDimitry Andric // is a graph of `or` operations, or the exit block is along the false edge
559eb11fae6SDimitry Andric // and the condition is a graph of `and` operations.
560eb11fae6SDimitry Andric if (!FullUnswitch) {
561145449b1SDimitry Andric if (ExitDirection ? !match(Cond, m_LogicalOr())
562145449b1SDimitry Andric : !match(Cond, m_LogicalAnd())) {
563344a3780SDimitry Andric LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "
564344a3780SDimitry Andric "non-full unswitch!\n");
565eb11fae6SDimitry Andric return false;
566eb11fae6SDimitry Andric }
567eb11fae6SDimitry Andric }
568eb11fae6SDimitry Andric
569eb11fae6SDimitry Andric LLVM_DEBUG({
570eb11fae6SDimitry Andric dbgs() << " unswitching trivial invariant conditions for: " << BI
571eb11fae6SDimitry Andric << "\n";
572eb11fae6SDimitry Andric for (Value *Invariant : Invariants) {
573eb11fae6SDimitry Andric dbgs() << " " << *Invariant << " == true";
574eb11fae6SDimitry Andric if (Invariant != Invariants.back())
575eb11fae6SDimitry Andric dbgs() << " ||";
576eb11fae6SDimitry Andric dbgs() << "\n";
577eb11fae6SDimitry Andric }
578eb11fae6SDimitry Andric });
579eb11fae6SDimitry Andric
580eb11fae6SDimitry Andric // If we have scalar evolutions, we need to invalidate them including this
581706b4fc4SDimitry Andric // loop, the loop containing the exit block and the topmost parent loop
582706b4fc4SDimitry Andric // exiting via LoopExitBB.
583eb11fae6SDimitry Andric if (SE) {
584e3b55780SDimitry Andric if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
585eb11fae6SDimitry Andric SE->forgetLoop(ExitL);
586eb11fae6SDimitry Andric else
587eb11fae6SDimitry Andric // Forget the entire nest as this exits the entire nest.
588eb11fae6SDimitry Andric SE->forgetTopmostLoop(&L);
589e3b55780SDimitry Andric SE->forgetBlockAndLoopDispositions();
590eb11fae6SDimitry Andric }
591a303c417SDimitry Andric
592d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
593d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
594d8e91e46SDimitry Andric
595a303c417SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert
596a303c417SDimitry Andric // the conditional branch. We will change the preheader to have a conditional
597a303c417SDimitry Andric // branch on LoopCond.
598a303c417SDimitry Andric BasicBlock *OldPH = L.getLoopPreheader();
599d8e91e46SDimitry Andric BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
600a303c417SDimitry Andric
601a303c417SDimitry Andric // Now that we have a place to insert the conditional branch, create a place
602a303c417SDimitry Andric // to branch to: this is the exit block out of the loop that we are
603a303c417SDimitry Andric // unswitching. We need to split this if there are other loop predecessors.
604a303c417SDimitry Andric // Because the loop is in simplified form, *any* other predecessor is enough.
605a303c417SDimitry Andric BasicBlock *UnswitchedBB;
606eb11fae6SDimitry Andric if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
607eb11fae6SDimitry Andric assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&
6086b3f41edSDimitry Andric "A branch's parent isn't a predecessor!");
609a303c417SDimitry Andric UnswitchedBB = LoopExitBB;
610a303c417SDimitry Andric } else {
611d8e91e46SDimitry Andric UnswitchedBB =
612b1c73532SDimitry Andric SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false);
613a303c417SDimitry Andric }
614a303c417SDimitry Andric
615d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
616d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
617d8e91e46SDimitry Andric
618eb11fae6SDimitry Andric // Actually move the invariant uses into the unswitched position. If possible,
619eb11fae6SDimitry Andric // we do this by moving the instructions, but when doing partial unswitching
620eb11fae6SDimitry Andric // we do it by building a new merge of the values in the unswitched position.
621a303c417SDimitry Andric OldPH->getTerminator()->eraseFromParent();
622eb11fae6SDimitry Andric if (FullUnswitch) {
623eb11fae6SDimitry Andric // If fully unswitching, we can use the existing branch instruction.
624eb11fae6SDimitry Andric // Splice it into the old PH to gate reaching the new preheader and re-point
625eb11fae6SDimitry Andric // its successors.
626b1c73532SDimitry Andric BI.moveBefore(*OldPH, OldPH->end());
627145449b1SDimitry Andric BI.setCondition(Cond);
628d8e91e46SDimitry Andric if (MSSAU) {
629d8e91e46SDimitry Andric // Temporarily clone the terminator, to make MSSA update cheaper by
630d8e91e46SDimitry Andric // separating "insert edge" updates from "remove edge" ones.
631e3b55780SDimitry Andric BI.clone()->insertInto(ParentBB, ParentBB->end());
632d8e91e46SDimitry Andric } else {
633a303c417SDimitry Andric // Create a new unconditional branch that will continue the loop as a new
634a303c417SDimitry Andric // terminator.
635ac9a064cSDimitry Andric Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
636ac9a064cSDimitry Andric NewBI->setDebugLoc(BI.getDebugLoc());
637d8e91e46SDimitry Andric }
638d8e91e46SDimitry Andric BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
639d8e91e46SDimitry Andric BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
640eb11fae6SDimitry Andric } else {
641eb11fae6SDimitry Andric // Only unswitching a subset of inputs to the condition, so we will need to
642eb11fae6SDimitry Andric // build a new branch that merges the invariant inputs.
643eb11fae6SDimitry Andric if (ExitDirection)
644145449b1SDimitry Andric assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) &&
645344a3780SDimitry Andric "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "
646344a3780SDimitry Andric "condition!");
647eb11fae6SDimitry Andric else
648145449b1SDimitry Andric assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) &&
649344a3780SDimitry Andric "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
650344a3780SDimitry Andric " condition!");
651145449b1SDimitry Andric buildPartialUnswitchConditionalBranch(
652145449b1SDimitry Andric *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
653145449b1SDimitry Andric FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
654eb11fae6SDimitry Andric }
655a303c417SDimitry Andric
656d8e91e46SDimitry Andric // Update the dominator tree with the added edge.
657d8e91e46SDimitry Andric DT.insertEdge(OldPH, UnswitchedBB);
658d8e91e46SDimitry Andric
659d8e91e46SDimitry Andric // After the dominator tree was updated with the added edge, update MemorySSA
660d8e91e46SDimitry Andric // if available.
661d8e91e46SDimitry Andric if (MSSAU) {
662d8e91e46SDimitry Andric SmallVector<CFGUpdate, 1> Updates;
663d8e91e46SDimitry Andric Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
664d8e91e46SDimitry Andric MSSAU->applyInsertUpdates(Updates, DT);
665d8e91e46SDimitry Andric }
666d8e91e46SDimitry Andric
667d8e91e46SDimitry Andric // Finish updating dominator tree and memory ssa for full unswitch.
668d8e91e46SDimitry Andric if (FullUnswitch) {
669d8e91e46SDimitry Andric if (MSSAU) {
670ac9a064cSDimitry Andric Instruction *Term = ParentBB->getTerminator();
671ac9a064cSDimitry Andric // Remove the cloned branch instruction and create unconditional branch
672ac9a064cSDimitry Andric // now.
673ac9a064cSDimitry Andric Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
674ac9a064cSDimitry Andric NewBI->setDebugLoc(Term->getDebugLoc());
675ac9a064cSDimitry Andric Term->eraseFromParent();
676d8e91e46SDimitry Andric MSSAU->removeEdge(ParentBB, LoopExitBB);
677d8e91e46SDimitry Andric }
678d8e91e46SDimitry Andric DT.deleteEdge(ParentBB, LoopExitBB);
679d8e91e46SDimitry Andric }
680d8e91e46SDimitry Andric
681d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
682d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
683d8e91e46SDimitry Andric
6846b3f41edSDimitry Andric // Rewrite the relevant PHI nodes.
6856b3f41edSDimitry Andric if (UnswitchedBB == LoopExitBB)
6866b3f41edSDimitry Andric rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
6876b3f41edSDimitry Andric else
6886b3f41edSDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
689eb11fae6SDimitry Andric *ParentBB, *OldPH, FullUnswitch);
6906b3f41edSDimitry Andric
691eb11fae6SDimitry Andric // The constant we can replace all of our invariants with inside the loop
692eb11fae6SDimitry Andric // body. If any of the invariants have a value other than this the loop won't
693eb11fae6SDimitry Andric // be entered.
694eb11fae6SDimitry Andric ConstantInt *Replacement = ExitDirection
695eb11fae6SDimitry Andric ? ConstantInt::getFalse(BI.getContext())
696eb11fae6SDimitry Andric : ConstantInt::getTrue(BI.getContext());
697a303c417SDimitry Andric
698a303c417SDimitry Andric // Since this is an i1 condition we can also trivially replace uses of it
699a303c417SDimitry Andric // within the loop with a constant.
700eb11fae6SDimitry Andric for (Value *Invariant : Invariants)
701eb11fae6SDimitry Andric replaceLoopInvariantUses(L, Invariant, *Replacement);
702eb11fae6SDimitry Andric
703eb11fae6SDimitry Andric // If this was full unswitching, we may have changed the nesting relationship
704eb11fae6SDimitry Andric // for this loop so hoist it to its correct parent if needed.
705eb11fae6SDimitry Andric if (FullUnswitch)
706706b4fc4SDimitry Andric hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
707e6d15924SDimitry Andric
708e6d15924SDimitry Andric if (MSSAU && VerifyMemorySSA)
709e6d15924SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
710a303c417SDimitry Andric
711d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
712a303c417SDimitry Andric ++NumTrivial;
713a303c417SDimitry Andric ++NumBranches;
714a303c417SDimitry Andric return true;
715a303c417SDimitry Andric }
716a303c417SDimitry Andric
717a303c417SDimitry Andric /// Unswitch a trivial switch if the condition is loop invariant.
718a303c417SDimitry Andric ///
719a303c417SDimitry Andric /// This routine should only be called when loop code leading to the switch has
720a303c417SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the
721a303c417SDimitry Andric /// condition is invariant and that at least one of the successors is a loop
722a303c417SDimitry Andric /// exit. This allows us to unswitch without duplicating the loop, making it
723a303c417SDimitry Andric /// trivial.
724a303c417SDimitry Andric ///
725a303c417SDimitry Andric /// If this routine fails to unswitch the switch it returns false.
726a303c417SDimitry Andric ///
727a303c417SDimitry Andric /// If the switch can be unswitched, this routine splits the preheader and
728a303c417SDimitry Andric /// copies the switch above that split. If the default case is one of the
729a303c417SDimitry Andric /// exiting cases, it copies the non-exiting cases and points them at the new
730a303c417SDimitry Andric /// preheader. If the default case is not exiting, it copies the exiting cases
731a303c417SDimitry Andric /// and points the default at the preheader. It preserves loop simplified form
732a303c417SDimitry Andric /// (splitting the exit blocks as necessary). It simplifies the switch within
733a303c417SDimitry Andric /// the loop by removing now-dead cases. If the default case is one of those
734a303c417SDimitry Andric /// unswitched, it replaces its destination with a new basic block containing
735a303c417SDimitry Andric /// only unreachable. Such basic blocks, while technically loop exits, are not
736a303c417SDimitry Andric /// considered for unswitching so this is a stable transform and the same
737a303c417SDimitry Andric /// switch will not be revisited. If after unswitching there is only a single
738a303c417SDimitry Andric /// in-loop successor, the switch is further simplified to an unconditional
739344a3780SDimitry Andric /// branch. Still more cleanup can be done with some simplifycfg like pass.
740eb11fae6SDimitry Andric ///
741eb11fae6SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
742eb11fae6SDimitry Andric /// invalidated by this.
unswitchTrivialSwitch(Loop & L,SwitchInst & SI,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)743a303c417SDimitry Andric static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
744d8e91e46SDimitry Andric LoopInfo &LI, ScalarEvolution *SE,
745d8e91e46SDimitry Andric MemorySSAUpdater *MSSAU) {
746eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
747a303c417SDimitry Andric Value *LoopCond = SI.getCondition();
748a303c417SDimitry Andric
749a303c417SDimitry Andric // If this isn't switching on an invariant condition, we can't unswitch it.
750a303c417SDimitry Andric if (!L.isLoopInvariant(LoopCond))
751a303c417SDimitry Andric return false;
752a303c417SDimitry Andric
7536b3f41edSDimitry Andric auto *ParentBB = SI.getParent();
7546b3f41edSDimitry Andric
755cfca06d7SDimitry Andric // The same check must be used both for the default and the exit cases. We
756cfca06d7SDimitry Andric // should never leave edges from the switch instruction to a basic block that
757cfca06d7SDimitry Andric // we are unswitching, hence the condition used to determine the default case
758cfca06d7SDimitry Andric // needs to also be used to populate ExitCaseIndices, which is then used to
759cfca06d7SDimitry Andric // remove cases from the switch.
760cfca06d7SDimitry Andric auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
761cfca06d7SDimitry Andric // BBToCheck is not an exit block if it is inside loop L.
762cfca06d7SDimitry Andric if (L.contains(&BBToCheck))
763cfca06d7SDimitry Andric return false;
764cfca06d7SDimitry Andric // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
765cfca06d7SDimitry Andric if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
766cfca06d7SDimitry Andric return false;
767cfca06d7SDimitry Andric // We do not unswitch a block that only has an unreachable statement, as
768cfca06d7SDimitry Andric // it's possible this is a previously unswitched block. Only unswitch if
769cfca06d7SDimitry Andric // either the terminator is not unreachable, or, if it is, it's not the only
770cfca06d7SDimitry Andric // instruction in the block.
771cfca06d7SDimitry Andric auto *TI = BBToCheck.getTerminator();
772cfca06d7SDimitry Andric bool isUnreachable = isa<UnreachableInst>(TI);
773cfca06d7SDimitry Andric return !isUnreachable ||
774cfca06d7SDimitry Andric (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
775cfca06d7SDimitry Andric };
776cfca06d7SDimitry Andric
777a303c417SDimitry Andric SmallVector<int, 4> ExitCaseIndices;
778cfca06d7SDimitry Andric for (auto Case : SI.cases())
779cfca06d7SDimitry Andric if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
780a303c417SDimitry Andric ExitCaseIndices.push_back(Case.getCaseIndex());
781a303c417SDimitry Andric BasicBlock *DefaultExitBB = nullptr;
782e6d15924SDimitry Andric SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
783e6d15924SDimitry Andric SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
784cfca06d7SDimitry Andric if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
785a303c417SDimitry Andric DefaultExitBB = SI.getDefaultDest();
786e6d15924SDimitry Andric } else if (ExitCaseIndices.empty())
787a303c417SDimitry Andric return false;
788a303c417SDimitry Andric
789d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
790d8e91e46SDimitry Andric
791d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
792d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
793a303c417SDimitry Andric
794eb11fae6SDimitry Andric // We may need to invalidate SCEVs for the outermost loop reached by any of
795eb11fae6SDimitry Andric // the exits.
796eb11fae6SDimitry Andric Loop *OuterL = &L;
797eb11fae6SDimitry Andric
798eb11fae6SDimitry Andric if (DefaultExitBB) {
7997fa27ce4SDimitry Andric // Check the loop containing this exit.
8007fa27ce4SDimitry Andric Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
8017fa27ce4SDimitry Andric if (!ExitL || ExitL->contains(OuterL))
8027fa27ce4SDimitry Andric OuterL = ExitL;
8037fa27ce4SDimitry Andric }
8047fa27ce4SDimitry Andric for (unsigned Index : ExitCaseIndices) {
8057fa27ce4SDimitry Andric auto CaseI = SI.case_begin() + Index;
8067fa27ce4SDimitry Andric // Compute the outer loop from this exit.
8077fa27ce4SDimitry Andric Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
8087fa27ce4SDimitry Andric if (!ExitL || ExitL->contains(OuterL))
8097fa27ce4SDimitry Andric OuterL = ExitL;
8107fa27ce4SDimitry Andric }
8117fa27ce4SDimitry Andric
8127fa27ce4SDimitry Andric if (SE) {
8137fa27ce4SDimitry Andric if (OuterL)
8147fa27ce4SDimitry Andric SE->forgetLoop(OuterL);
8157fa27ce4SDimitry Andric else
8167fa27ce4SDimitry Andric SE->forgetTopmostLoop(&L);
8177fa27ce4SDimitry Andric }
8187fa27ce4SDimitry Andric
8197fa27ce4SDimitry Andric if (DefaultExitBB) {
820eb11fae6SDimitry Andric // Clear out the default destination temporarily to allow accurate
821eb11fae6SDimitry Andric // predecessor lists to be examined below.
822eb11fae6SDimitry Andric SI.setDefaultDest(nullptr);
823eb11fae6SDimitry Andric }
824eb11fae6SDimitry Andric
825eb11fae6SDimitry Andric // Store the exit cases into a separate data structure and remove them from
826eb11fae6SDimitry Andric // the switch.
827e6d15924SDimitry Andric SmallVector<std::tuple<ConstantInt *, BasicBlock *,
828e6d15924SDimitry Andric SwitchInstProfUpdateWrapper::CaseWeightOpt>,
829e6d15924SDimitry Andric 4> ExitCases;
830a303c417SDimitry Andric ExitCases.reserve(ExitCaseIndices.size());
831e6d15924SDimitry Andric SwitchInstProfUpdateWrapper SIW(SI);
832a303c417SDimitry Andric // We walk the case indices backwards so that we remove the last case first
833a303c417SDimitry Andric // and don't disrupt the earlier indices.
834a303c417SDimitry Andric for (unsigned Index : reverse(ExitCaseIndices)) {
835a303c417SDimitry Andric auto CaseI = SI.case_begin() + Index;
836a303c417SDimitry Andric // Save the value of this case.
837e6d15924SDimitry Andric auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
838e6d15924SDimitry Andric ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
839a303c417SDimitry Andric // Delete the unswitched cases.
840e6d15924SDimitry Andric SIW.removeCase(CaseI);
841a303c417SDimitry Andric }
842a303c417SDimitry Andric
843a303c417SDimitry Andric // Check if after this all of the remaining cases point at the same
844a303c417SDimitry Andric // successor.
845a303c417SDimitry Andric BasicBlock *CommonSuccBB = nullptr;
846a303c417SDimitry Andric if (SI.getNumCases() > 0 &&
847b60736ecSDimitry Andric all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
848b60736ecSDimitry Andric return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
849a303c417SDimitry Andric }))
850a303c417SDimitry Andric CommonSuccBB = SI.case_begin()->getCaseSuccessor();
851eb11fae6SDimitry Andric if (!DefaultExitBB) {
852a303c417SDimitry Andric // If we're not unswitching the default, we need it to match any cases to
853a303c417SDimitry Andric // have a common successor or if we have no cases it is the common
854a303c417SDimitry Andric // successor.
855a303c417SDimitry Andric if (SI.getNumCases() == 0)
856a303c417SDimitry Andric CommonSuccBB = SI.getDefaultDest();
857a303c417SDimitry Andric else if (SI.getDefaultDest() != CommonSuccBB)
858a303c417SDimitry Andric CommonSuccBB = nullptr;
859a303c417SDimitry Andric }
860a303c417SDimitry Andric
861a303c417SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert
862a303c417SDimitry Andric // the switch.
863a303c417SDimitry Andric BasicBlock *OldPH = L.getLoopPreheader();
864d8e91e46SDimitry Andric BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
865a303c417SDimitry Andric OldPH->getTerminator()->eraseFromParent();
866a303c417SDimitry Andric
867ac9a064cSDimitry Andric // Now add the unswitched switch. This new switch instruction inherits the
868ac9a064cSDimitry Andric // debug location of the old switch, because it semantically replace the old
869ac9a064cSDimitry Andric // one.
870a303c417SDimitry Andric auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
871ac9a064cSDimitry Andric NewSI->setDebugLoc(SIW->getDebugLoc());
872e6d15924SDimitry Andric SwitchInstProfUpdateWrapper NewSIW(*NewSI);
873a303c417SDimitry Andric
8746b3f41edSDimitry Andric // Rewrite the IR for the unswitched basic blocks. This requires two steps.
8756b3f41edSDimitry Andric // First, we split any exit blocks with remaining in-loop predecessors. Then
8766b3f41edSDimitry Andric // we update the PHIs in one of two ways depending on if there was a split.
8776b3f41edSDimitry Andric // We walk in reverse so that we split in the same order as the cases
8786b3f41edSDimitry Andric // appeared. This is purely for convenience of reading the resulting IR, but
8796b3f41edSDimitry Andric // it doesn't cost anything really.
8806b3f41edSDimitry Andric SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
881a303c417SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
882a303c417SDimitry Andric // Handle the default exit if necessary.
883a303c417SDimitry Andric // FIXME: It'd be great if we could merge this with the loop below but LLVM's
884a303c417SDimitry Andric // ranges aren't quite powerful enough yet.
8856b3f41edSDimitry Andric if (DefaultExitBB) {
8866b3f41edSDimitry Andric if (pred_empty(DefaultExitBB)) {
8876b3f41edSDimitry Andric UnswitchedExitBBs.insert(DefaultExitBB);
8886b3f41edSDimitry Andric rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
8896b3f41edSDimitry Andric } else {
890a303c417SDimitry Andric auto *SplitBB =
891b1c73532SDimitry Andric SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU);
892d8e91e46SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
893d8e91e46SDimitry Andric *ParentBB, *OldPH,
894d8e91e46SDimitry Andric /*FullUnswitch*/ true);
895a303c417SDimitry Andric DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
896a303c417SDimitry Andric }
8976b3f41edSDimitry Andric }
898a303c417SDimitry Andric // Note that we must use a reference in the for loop so that we update the
899a303c417SDimitry Andric // container.
900e6d15924SDimitry Andric for (auto &ExitCase : reverse(ExitCases)) {
901a303c417SDimitry Andric // Grab a reference to the exit block in the pair so that we can update it.
902e6d15924SDimitry Andric BasicBlock *ExitBB = std::get<1>(ExitCase);
903a303c417SDimitry Andric
904a303c417SDimitry Andric // If this case is the last edge into the exit block, we can simply reuse it
905a303c417SDimitry Andric // as it will no longer be a loop exit. No mapping necessary.
9066b3f41edSDimitry Andric if (pred_empty(ExitBB)) {
9076b3f41edSDimitry Andric // Only rewrite once.
9086b3f41edSDimitry Andric if (UnswitchedExitBBs.insert(ExitBB).second)
9096b3f41edSDimitry Andric rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
910a303c417SDimitry Andric continue;
9116b3f41edSDimitry Andric }
912a303c417SDimitry Andric
913a303c417SDimitry Andric // Otherwise we need to split the exit block so that we retain an exit
914a303c417SDimitry Andric // block from the loop and a target for the unswitched condition.
915a303c417SDimitry Andric BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
916a303c417SDimitry Andric if (!SplitExitBB) {
917a303c417SDimitry Andric // If this is the first time we see this, do the split and remember it.
918b1c73532SDimitry Andric SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
919d8e91e46SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
920d8e91e46SDimitry Andric *ParentBB, *OldPH,
921d8e91e46SDimitry Andric /*FullUnswitch*/ true);
922a303c417SDimitry Andric }
9236b3f41edSDimitry Andric // Update the case pair to point to the split block.
924e6d15924SDimitry Andric std::get<1>(ExitCase) = SplitExitBB;
925a303c417SDimitry Andric }
926a303c417SDimitry Andric
927a303c417SDimitry Andric // Now add the unswitched cases. We do this in reverse order as we built them
928a303c417SDimitry Andric // in reverse order.
929e6d15924SDimitry Andric for (auto &ExitCase : reverse(ExitCases)) {
930e6d15924SDimitry Andric ConstantInt *CaseVal = std::get<0>(ExitCase);
931e6d15924SDimitry Andric BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
932a303c417SDimitry Andric
933e6d15924SDimitry Andric NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
934a303c417SDimitry Andric }
935a303c417SDimitry Andric
936a303c417SDimitry Andric // If the default was unswitched, re-point it and add explicit cases for
937a303c417SDimitry Andric // entering the loop.
938a303c417SDimitry Andric if (DefaultExitBB) {
939e6d15924SDimitry Andric NewSIW->setDefaultDest(DefaultExitBB);
940e6d15924SDimitry Andric NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
941a303c417SDimitry Andric
942a303c417SDimitry Andric // We removed all the exit cases, so we just copy the cases to the
943a303c417SDimitry Andric // unswitched switch.
944e6d15924SDimitry Andric for (const auto &Case : SI.cases())
945e6d15924SDimitry Andric NewSIW.addCase(Case.getCaseValue(), NewPH,
946e6d15924SDimitry Andric SIW.getSuccessorWeight(Case.getSuccessorIndex()));
947e6d15924SDimitry Andric } else if (DefaultCaseWeight) {
948e6d15924SDimitry Andric // We have to set branch weight of the default case.
949e6d15924SDimitry Andric uint64_t SW = *DefaultCaseWeight;
950e6d15924SDimitry Andric for (const auto &Case : SI.cases()) {
951e6d15924SDimitry Andric auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
952e6d15924SDimitry Andric assert(W &&
953e6d15924SDimitry Andric "case weight must be defined as default case weight is defined");
954e6d15924SDimitry Andric SW += *W;
955e6d15924SDimitry Andric }
956e6d15924SDimitry Andric NewSIW.setSuccessorWeight(0, SW);
957a303c417SDimitry Andric }
958a303c417SDimitry Andric
959a303c417SDimitry Andric // If we ended up with a common successor for every path through the switch
960a303c417SDimitry Andric // after unswitching, rewrite it to an unconditional branch to make it easy
961a303c417SDimitry Andric // to recognize. Otherwise we potentially have to recognize the default case
962a303c417SDimitry Andric // pointing at unreachable and other complexity.
963a303c417SDimitry Andric if (CommonSuccBB) {
964a303c417SDimitry Andric BasicBlock *BB = SI.getParent();
965eb11fae6SDimitry Andric // We may have had multiple edges to this common successor block, so remove
966eb11fae6SDimitry Andric // them as predecessors. We skip the first one, either the default or the
967eb11fae6SDimitry Andric // actual first case.
968eb11fae6SDimitry Andric bool SkippedFirst = DefaultExitBB == nullptr;
969eb11fae6SDimitry Andric for (auto Case : SI.cases()) {
970eb11fae6SDimitry Andric assert(Case.getCaseSuccessor() == CommonSuccBB &&
971eb11fae6SDimitry Andric "Non-common successor!");
972eb11fae6SDimitry Andric (void)Case;
973eb11fae6SDimitry Andric if (!SkippedFirst) {
974eb11fae6SDimitry Andric SkippedFirst = true;
975eb11fae6SDimitry Andric continue;
976eb11fae6SDimitry Andric }
977eb11fae6SDimitry Andric CommonSuccBB->removePredecessor(BB,
978e6d15924SDimitry Andric /*KeepOneInputPHIs*/ true);
979eb11fae6SDimitry Andric }
980eb11fae6SDimitry Andric // Now nuke the switch and replace it with a direct branch.
981ac9a064cSDimitry Andric Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
982ac9a064cSDimitry Andric NewBI->setDebugLoc(SIW->getDebugLoc());
983e6d15924SDimitry Andric SIW.eraseFromParent();
984eb11fae6SDimitry Andric } else if (DefaultExitBB) {
985eb11fae6SDimitry Andric assert(SI.getNumCases() > 0 &&
986eb11fae6SDimitry Andric "If we had no cases we'd have a common successor!");
987eb11fae6SDimitry Andric // Move the last case to the default successor. This is valid as if the
988eb11fae6SDimitry Andric // default got unswitched it cannot be reached. This has the advantage of
989eb11fae6SDimitry Andric // being simple and keeping the number of edges from this switch to
990eb11fae6SDimitry Andric // successors the same, and avoiding any PHI update complexity.
991eb11fae6SDimitry Andric auto LastCaseI = std::prev(SI.case_end());
992e6d15924SDimitry Andric
993eb11fae6SDimitry Andric SI.setDefaultDest(LastCaseI->getCaseSuccessor());
994e6d15924SDimitry Andric SIW.setSuccessorWeight(
995e6d15924SDimitry Andric 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
996e6d15924SDimitry Andric SIW.removeCase(LastCaseI);
997a303c417SDimitry Andric }
998a303c417SDimitry Andric
999eb11fae6SDimitry Andric // Walk the unswitched exit blocks and the unswitched split blocks and update
1000eb11fae6SDimitry Andric // the dominator tree based on the CFG edits. While we are walking unordered
1001eb11fae6SDimitry Andric // containers here, the API for applyUpdates takes an unordered list of
1002eb11fae6SDimitry Andric // updates and requires them to not contain duplicates.
1003eb11fae6SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
1004eb11fae6SDimitry Andric for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
1005eb11fae6SDimitry Andric DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
1006eb11fae6SDimitry Andric DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
1007eb11fae6SDimitry Andric }
1008eb11fae6SDimitry Andric for (auto SplitUnswitchedPair : SplitExitBBMap) {
1009e6d15924SDimitry Andric DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
1010e6d15924SDimitry Andric DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
1011eb11fae6SDimitry Andric }
1012d8e91e46SDimitry Andric
1013d8e91e46SDimitry Andric if (MSSAU) {
1014b60736ecSDimitry Andric MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
1015d8e91e46SDimitry Andric if (VerifyMemorySSA)
1016d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
1017b60736ecSDimitry Andric } else {
1018b60736ecSDimitry Andric DT.applyUpdates(DTUpdates);
1019d8e91e46SDimitry Andric }
1020d8e91e46SDimitry Andric
1021eb11fae6SDimitry Andric assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1022eb11fae6SDimitry Andric
1023eb11fae6SDimitry Andric // We may have changed the nesting relationship for this loop so hoist it to
1024eb11fae6SDimitry Andric // its correct parent if needed.
1025706b4fc4SDimitry Andric hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
1026e6d15924SDimitry Andric
1027e6d15924SDimitry Andric if (MSSAU && VerifyMemorySSA)
1028e6d15924SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
1029eb11fae6SDimitry Andric
1030a303c417SDimitry Andric ++NumTrivial;
1031a303c417SDimitry Andric ++NumSwitches;
1032d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n");
1033a303c417SDimitry Andric return true;
1034a303c417SDimitry Andric }
1035a303c417SDimitry Andric
1036a303c417SDimitry Andric /// This routine scans the loop to find a branch or switch which occurs before
1037a303c417SDimitry Andric /// any side effects occur. These can potentially be unswitched without
1038a303c417SDimitry Andric /// duplicating the loop. If a branch or switch is successfully unswitched the
1039a303c417SDimitry Andric /// scanning continues to see if subsequent branches or switches have become
1040a303c417SDimitry Andric /// trivial. Once all trivial candidates have been unswitched, this routine
1041a303c417SDimitry Andric /// returns.
1042a303c417SDimitry Andric ///
1043a303c417SDimitry Andric /// The return value indicates whether anything was unswitched (and therefore
1044a303c417SDimitry Andric /// changed).
1045eb11fae6SDimitry Andric ///
1046eb11fae6SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs
1047eb11fae6SDimitry Andric /// invalidated by this.
unswitchAllTrivialConditions(Loop & L,DominatorTree & DT,LoopInfo & LI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU)1048a303c417SDimitry Andric static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
1049d8e91e46SDimitry Andric LoopInfo &LI, ScalarEvolution *SE,
1050d8e91e46SDimitry Andric MemorySSAUpdater *MSSAU) {
1051a303c417SDimitry Andric bool Changed = false;
1052a303c417SDimitry Andric
1053a303c417SDimitry Andric // If loop header has only one reachable successor we should keep looking for
1054a303c417SDimitry Andric // trivial condition candidates in the successor as well. An alternative is
1055a303c417SDimitry Andric // to constant fold conditions and merge successors into loop header (then we
1056a303c417SDimitry Andric // only need to check header's terminator). The reason for not doing this in
1057a303c417SDimitry Andric // LoopUnswitch pass is that it could potentially break LoopPassManager's
1058a303c417SDimitry Andric // invariants. Folding dead branches could either eliminate the current loop
1059a303c417SDimitry Andric // or make other loops unreachable. LCSSA form might also not be preserved
1060a303c417SDimitry Andric // after deleting branches. The following code keeps traversing loop header's
1061a303c417SDimitry Andric // successors until it finds the trivial condition candidate (condition that
1062a303c417SDimitry Andric // is not a constant). Since unswitching generates branches with constant
1063a303c417SDimitry Andric // conditions, this scenario could be very common in practice.
1064a303c417SDimitry Andric BasicBlock *CurrentBB = L.getHeader();
1065a303c417SDimitry Andric SmallPtrSet<BasicBlock *, 8> Visited;
1066a303c417SDimitry Andric Visited.insert(CurrentBB);
1067a303c417SDimitry Andric do {
1068a303c417SDimitry Andric // Check if there are any side-effecting instructions (e.g. stores, calls,
1069a303c417SDimitry Andric // volatile loads) in the part of the loop that the code *would* execute
1070a303c417SDimitry Andric // without unswitching.
1071e6d15924SDimitry Andric if (MSSAU) // Possible early exit with MSSA
1072e6d15924SDimitry Andric if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
1073e6d15924SDimitry Andric if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
1074e6d15924SDimitry Andric return Changed;
1075a303c417SDimitry Andric if (llvm::any_of(*CurrentBB,
1076a303c417SDimitry Andric [](Instruction &I) { return I.mayHaveSideEffects(); }))
1077a303c417SDimitry Andric return Changed;
1078a303c417SDimitry Andric
1079d8e91e46SDimitry Andric Instruction *CurrentTerm = CurrentBB->getTerminator();
1080a303c417SDimitry Andric
1081a303c417SDimitry Andric if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
1082a303c417SDimitry Andric // Don't bother trying to unswitch past a switch with a constant
1083a303c417SDimitry Andric // condition. This should be removed prior to running this pass by
1084344a3780SDimitry Andric // simplifycfg.
1085a303c417SDimitry Andric if (isa<Constant>(SI->getCondition()))
1086a303c417SDimitry Andric return Changed;
1087a303c417SDimitry Andric
1088d8e91e46SDimitry Andric if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
1089eb11fae6SDimitry Andric // Couldn't unswitch this one so we're done.
1090a303c417SDimitry Andric return Changed;
1091a303c417SDimitry Andric
1092a303c417SDimitry Andric // Mark that we managed to unswitch something.
1093a303c417SDimitry Andric Changed = true;
1094a303c417SDimitry Andric
1095a303c417SDimitry Andric // If unswitching turned the terminator into an unconditional branch then
1096a303c417SDimitry Andric // we can continue. The unswitching logic specifically works to fold any
1097a303c417SDimitry Andric // cases it can into an unconditional branch to make it easier to
1098a303c417SDimitry Andric // recognize here.
1099a303c417SDimitry Andric auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1100a303c417SDimitry Andric if (!BI || BI->isConditional())
1101a303c417SDimitry Andric return Changed;
1102a303c417SDimitry Andric
1103a303c417SDimitry Andric CurrentBB = BI->getSuccessor(0);
1104a303c417SDimitry Andric continue;
1105a303c417SDimitry Andric }
1106a303c417SDimitry Andric
1107a303c417SDimitry Andric auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1108a303c417SDimitry Andric if (!BI)
1109a303c417SDimitry Andric // We do not understand other terminator instructions.
1110a303c417SDimitry Andric return Changed;
1111a303c417SDimitry Andric
1112a303c417SDimitry Andric // Don't bother trying to unswitch past an unconditional branch or a branch
1113344a3780SDimitry Andric // with a constant value. These should be removed by simplifycfg prior to
1114a303c417SDimitry Andric // running this pass.
1115145449b1SDimitry Andric if (!BI->isConditional() ||
1116145449b1SDimitry Andric isa<Constant>(skipTrivialSelect(BI->getCondition())))
1117a303c417SDimitry Andric return Changed;
1118a303c417SDimitry Andric
1119a303c417SDimitry Andric // Found a trivial condition candidate: non-foldable conditional branch. If
1120a303c417SDimitry Andric // we fail to unswitch this, we can't do anything else that is trivial.
1121d8e91e46SDimitry Andric if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1122a303c417SDimitry Andric return Changed;
1123a303c417SDimitry Andric
1124a303c417SDimitry Andric // Mark that we managed to unswitch something.
1125a303c417SDimitry Andric Changed = true;
1126a303c417SDimitry Andric
1127eb11fae6SDimitry Andric // If we only unswitched some of the conditions feeding the branch, we won't
1128eb11fae6SDimitry Andric // have collapsed it to a single successor.
1129a303c417SDimitry Andric BI = cast<BranchInst>(CurrentBB->getTerminator());
1130eb11fae6SDimitry Andric if (BI->isConditional())
1131eb11fae6SDimitry Andric return Changed;
1132eb11fae6SDimitry Andric
1133eb11fae6SDimitry Andric // Follow the newly unconditional branch into its successor.
1134a303c417SDimitry Andric CurrentBB = BI->getSuccessor(0);
1135a303c417SDimitry Andric
1136a303c417SDimitry Andric // When continuing, if we exit the loop or reach a previous visited block,
1137a303c417SDimitry Andric // then we can not reach any trivial condition candidates (unfoldable
1138a303c417SDimitry Andric // branch instructions or switch instructions) and no unswitch can happen.
1139a303c417SDimitry Andric } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1140a303c417SDimitry Andric
1141a303c417SDimitry Andric return Changed;
1142a303c417SDimitry Andric }
1143a303c417SDimitry Andric
1144044eb2f6SDimitry Andric /// Build the cloned blocks for an unswitched copy of the given loop.
1145044eb2f6SDimitry Andric ///
1146044eb2f6SDimitry Andric /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1147044eb2f6SDimitry Andric /// after the split block (`SplitBB`) that will be used to select between the
1148044eb2f6SDimitry Andric /// cloned and original loop.
1149044eb2f6SDimitry Andric ///
1150044eb2f6SDimitry Andric /// This routine handles cloning all of the necessary loop blocks and exit
1151044eb2f6SDimitry Andric /// blocks including rewriting their instructions and the relevant PHI nodes.
1152eb11fae6SDimitry Andric /// Any loop blocks or exit blocks which are dominated by a different successor
1153eb11fae6SDimitry Andric /// than the one for this clone of the loop blocks can be trivially skipped. We
1154eb11fae6SDimitry Andric /// use the `DominatingSucc` map to determine whether a block satisfies that
1155eb11fae6SDimitry Andric /// property with a simple map lookup.
1156eb11fae6SDimitry Andric ///
1157eb11fae6SDimitry Andric /// It also correctly creates the unconditional branch in the cloned
1158044eb2f6SDimitry Andric /// unswitched parent block to only point at the unswitched successor.
1159044eb2f6SDimitry Andric ///
1160044eb2f6SDimitry Andric /// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1161044eb2f6SDimitry Andric /// block splitting is correctly reflected in `LoopInfo`, essentially all of
1162044eb2f6SDimitry Andric /// the cloned blocks (and their loops) are left without full `LoopInfo`
1163044eb2f6SDimitry Andric /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1164044eb2f6SDimitry Andric /// blocks to them but doesn't create the cloned `DominatorTree` structure and
1165044eb2f6SDimitry Andric /// instead the caller must recompute an accurate DT. It *does* correctly
1166044eb2f6SDimitry Andric /// update the `AssumptionCache` provided in `AC`.
buildClonedLoopBlocks(Loop & L,BasicBlock * LoopPH,BasicBlock * SplitBB,ArrayRef<BasicBlock * > ExitBlocks,BasicBlock * ParentBB,BasicBlock * UnswitchedSuccBB,BasicBlock * ContinueSuccBB,const SmallDenseMap<BasicBlock *,BasicBlock *,16> & DominatingSucc,ValueToValueMapTy & VMap,SmallVectorImpl<DominatorTree::UpdateType> & DTUpdates,AssumptionCache & AC,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU,ScalarEvolution * SE)1167044eb2f6SDimitry Andric static BasicBlock *buildClonedLoopBlocks(
1168044eb2f6SDimitry Andric Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1169044eb2f6SDimitry Andric ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1170044eb2f6SDimitry Andric BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1171eb11fae6SDimitry Andric const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1172eb11fae6SDimitry Andric ValueToValueMapTy &VMap,
1173eb11fae6SDimitry Andric SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
1174e3b55780SDimitry Andric DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
1175e3b55780SDimitry Andric ScalarEvolution *SE) {
1176044eb2f6SDimitry Andric SmallVector<BasicBlock *, 4> NewBlocks;
1177044eb2f6SDimitry Andric NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1178044eb2f6SDimitry Andric
1179044eb2f6SDimitry Andric // We will need to clone a bunch of blocks, wrap up the clone operation in
1180044eb2f6SDimitry Andric // a helper.
1181044eb2f6SDimitry Andric auto CloneBlock = [&](BasicBlock *OldBB) {
1182044eb2f6SDimitry Andric // Clone the basic block and insert it before the new preheader.
1183044eb2f6SDimitry Andric BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1184044eb2f6SDimitry Andric NewBB->moveBefore(LoopPH);
1185044eb2f6SDimitry Andric
1186044eb2f6SDimitry Andric // Record this block and the mapping.
1187044eb2f6SDimitry Andric NewBlocks.push_back(NewBB);
1188044eb2f6SDimitry Andric VMap[OldBB] = NewBB;
1189044eb2f6SDimitry Andric
1190044eb2f6SDimitry Andric return NewBB;
1191044eb2f6SDimitry Andric };
1192044eb2f6SDimitry Andric
1193eb11fae6SDimitry Andric // We skip cloning blocks when they have a dominating succ that is not the
1194eb11fae6SDimitry Andric // succ we are cloning for.
1195eb11fae6SDimitry Andric auto SkipBlock = [&](BasicBlock *BB) {
1196eb11fae6SDimitry Andric auto It = DominatingSucc.find(BB);
1197eb11fae6SDimitry Andric return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1198eb11fae6SDimitry Andric };
1199eb11fae6SDimitry Andric
1200044eb2f6SDimitry Andric // First, clone the preheader.
1201044eb2f6SDimitry Andric auto *ClonedPH = CloneBlock(LoopPH);
1202044eb2f6SDimitry Andric
1203044eb2f6SDimitry Andric // Then clone all the loop blocks, skipping the ones that aren't necessary.
1204044eb2f6SDimitry Andric for (auto *LoopBB : L.blocks())
1205eb11fae6SDimitry Andric if (!SkipBlock(LoopBB))
1206044eb2f6SDimitry Andric CloneBlock(LoopBB);
1207044eb2f6SDimitry Andric
1208044eb2f6SDimitry Andric // Split all the loop exit edges so that when we clone the exit blocks, if
1209044eb2f6SDimitry Andric // any of the exit blocks are *also* a preheader for some other loop, we
1210044eb2f6SDimitry Andric // don't create multiple predecessors entering the loop header.
1211044eb2f6SDimitry Andric for (auto *ExitBB : ExitBlocks) {
1212eb11fae6SDimitry Andric if (SkipBlock(ExitBB))
1213044eb2f6SDimitry Andric continue;
1214044eb2f6SDimitry Andric
1215044eb2f6SDimitry Andric // When we are going to clone an exit, we don't need to clone all the
1216044eb2f6SDimitry Andric // instructions in the exit block and we want to ensure we have an easy
1217044eb2f6SDimitry Andric // place to merge the CFG, so split the exit first. This is always safe to
1218044eb2f6SDimitry Andric // do because there cannot be any non-loop predecessors of a loop exit in
1219044eb2f6SDimitry Andric // loop simplified form.
1220b1c73532SDimitry Andric auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU);
1221044eb2f6SDimitry Andric
1222044eb2f6SDimitry Andric // Rearrange the names to make it easier to write test cases by having the
1223044eb2f6SDimitry Andric // exit block carry the suffix rather than the merge block carrying the
1224044eb2f6SDimitry Andric // suffix.
1225044eb2f6SDimitry Andric MergeBB->takeName(ExitBB);
1226044eb2f6SDimitry Andric ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1227044eb2f6SDimitry Andric
1228044eb2f6SDimitry Andric // Now clone the original exit block.
1229044eb2f6SDimitry Andric auto *ClonedExitBB = CloneBlock(ExitBB);
1230044eb2f6SDimitry Andric assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&
1231044eb2f6SDimitry Andric "Exit block should have been split to have one successor!");
1232044eb2f6SDimitry Andric assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&
1233044eb2f6SDimitry Andric "Cloned exit block has the wrong successor!");
1234044eb2f6SDimitry Andric
1235044eb2f6SDimitry Andric // Remap any cloned instructions and create a merge phi node for them.
1236044eb2f6SDimitry Andric for (auto ZippedInsts : llvm::zip_first(
1237044eb2f6SDimitry Andric llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1238044eb2f6SDimitry Andric llvm::make_range(ClonedExitBB->begin(),
1239044eb2f6SDimitry Andric std::prev(ClonedExitBB->end())))) {
1240044eb2f6SDimitry Andric Instruction &I = std::get<0>(ZippedInsts);
1241044eb2f6SDimitry Andric Instruction &ClonedI = std::get<1>(ZippedInsts);
1242044eb2f6SDimitry Andric
1243044eb2f6SDimitry Andric // The only instructions in the exit block should be PHI nodes and
1244044eb2f6SDimitry Andric // potentially a landing pad.
1245044eb2f6SDimitry Andric assert(
1246044eb2f6SDimitry Andric (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&
1247044eb2f6SDimitry Andric "Bad instruction in exit block!");
1248044eb2f6SDimitry Andric // We should have a value map between the instruction and its clone.
1249044eb2f6SDimitry Andric assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
1250044eb2f6SDimitry Andric
1251e3b55780SDimitry Andric // Forget SCEVs based on exit phis in case SCEV looked through the phi.
1252a51c056eSDimitry Andric if (SE)
1253a51c056eSDimitry Andric if (auto *PN = dyn_cast<PHINode>(&I))
1254a51c056eSDimitry Andric SE->forgetLcssaPhiWithNewPredecessor(&L, PN);
1255e3b55780SDimitry Andric
1256ac9a064cSDimitry Andric BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
1257ac9a064cSDimitry Andric
1258044eb2f6SDimitry Andric auto *MergePN =
1259b1c73532SDimitry Andric PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
1260ac9a064cSDimitry Andric MergePN->insertBefore(InsertPt);
1261ac9a064cSDimitry Andric MergePN->setDebugLoc(InsertPt->getDebugLoc());
1262044eb2f6SDimitry Andric I.replaceAllUsesWith(MergePN);
1263044eb2f6SDimitry Andric MergePN->addIncoming(&I, ExitBB);
1264044eb2f6SDimitry Andric MergePN->addIncoming(&ClonedI, ClonedExitBB);
1265044eb2f6SDimitry Andric }
1266044eb2f6SDimitry Andric }
1267044eb2f6SDimitry Andric
1268044eb2f6SDimitry Andric // Rewrite the instructions in the cloned blocks to refer to the instructions
1269044eb2f6SDimitry Andric // in the cloned blocks. We have to do this as a second pass so that we have
1270044eb2f6SDimitry Andric // everything available. Also, we have inserted new instructions which may
1271044eb2f6SDimitry Andric // include assume intrinsics, so we update the assumption cache while
1272044eb2f6SDimitry Andric // processing this.
1273b1c73532SDimitry Andric Module *M = ClonedPH->getParent()->getParent();
1274044eb2f6SDimitry Andric for (auto *ClonedBB : NewBlocks)
1275044eb2f6SDimitry Andric for (Instruction &I : *ClonedBB) {
1276ac9a064cSDimitry Andric RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
1277b1c73532SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1278044eb2f6SDimitry Andric RemapInstruction(&I, VMap,
1279044eb2f6SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1280344a3780SDimitry Andric if (auto *II = dyn_cast<AssumeInst>(&I))
1281044eb2f6SDimitry Andric AC.registerAssumption(II);
1282044eb2f6SDimitry Andric }
1283044eb2f6SDimitry Andric
1284eb11fae6SDimitry Andric // Update any PHI nodes in the cloned successors of the skipped blocks to not
1285eb11fae6SDimitry Andric // have spurious incoming values.
1286eb11fae6SDimitry Andric for (auto *LoopBB : L.blocks())
1287eb11fae6SDimitry Andric if (SkipBlock(LoopBB))
1288eb11fae6SDimitry Andric for (auto *SuccBB : successors(LoopBB))
1289eb11fae6SDimitry Andric if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1290eb11fae6SDimitry Andric for (PHINode &PN : ClonedSuccBB->phis())
1291eb11fae6SDimitry Andric PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1292eb11fae6SDimitry Andric
1293eb11fae6SDimitry Andric // Remove the cloned parent as a predecessor of any successor we ended up
1294eb11fae6SDimitry Andric // cloning other than the unswitched one.
1295044eb2f6SDimitry Andric auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1296eb11fae6SDimitry Andric for (auto *SuccBB : successors(ParentBB)) {
1297eb11fae6SDimitry Andric if (SuccBB == UnswitchedSuccBB)
1298eb11fae6SDimitry Andric continue;
1299eb11fae6SDimitry Andric
1300eb11fae6SDimitry Andric auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1301eb11fae6SDimitry Andric if (!ClonedSuccBB)
1302eb11fae6SDimitry Andric continue;
1303eb11fae6SDimitry Andric
1304eb11fae6SDimitry Andric ClonedSuccBB->removePredecessor(ClonedParentBB,
1305e6d15924SDimitry Andric /*KeepOneInputPHIs*/ true);
1306eb11fae6SDimitry Andric }
1307eb11fae6SDimitry Andric
1308eb11fae6SDimitry Andric // Replace the cloned branch with an unconditional branch to the cloned
1309044eb2f6SDimitry Andric // unswitched successor.
1310044eb2f6SDimitry Andric auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1311b60736ecSDimitry Andric Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1312b60736ecSDimitry Andric // Trivial Simplification. If Terminator is a conditional branch and
1313b60736ecSDimitry Andric // condition becomes dead - erase it.
1314b60736ecSDimitry Andric Value *ClonedConditionToErase = nullptr;
1315b60736ecSDimitry Andric if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1316b60736ecSDimitry Andric ClonedConditionToErase = BI->getCondition();
1317b60736ecSDimitry Andric else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1318b60736ecSDimitry Andric ClonedConditionToErase = SI->getCondition();
1319b60736ecSDimitry Andric
1320ac9a064cSDimitry Andric Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1321ac9a064cSDimitry Andric BI->setDebugLoc(ClonedTerminator->getDebugLoc());
1322b60736ecSDimitry Andric ClonedTerminator->eraseFromParent();
1323044eb2f6SDimitry Andric
1324b60736ecSDimitry Andric if (ClonedConditionToErase)
1325b60736ecSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1326b60736ecSDimitry Andric MSSAU);
1327b60736ecSDimitry Andric
1328eb11fae6SDimitry Andric // If there are duplicate entries in the PHI nodes because of multiple edges
1329eb11fae6SDimitry Andric // to the unswitched successor, we need to nuke all but one as we replaced it
1330eb11fae6SDimitry Andric // with a direct branch.
1331eb11fae6SDimitry Andric for (PHINode &PN : ClonedSuccBB->phis()) {
1332eb11fae6SDimitry Andric bool Found = false;
1333eb11fae6SDimitry Andric // Loop over the incoming operands backwards so we can easily delete as we
1334eb11fae6SDimitry Andric // go without invalidating the index.
1335eb11fae6SDimitry Andric for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1336eb11fae6SDimitry Andric if (PN.getIncomingBlock(i) != ClonedParentBB)
1337eb11fae6SDimitry Andric continue;
1338eb11fae6SDimitry Andric if (!Found) {
1339eb11fae6SDimitry Andric Found = true;
1340eb11fae6SDimitry Andric continue;
1341eb11fae6SDimitry Andric }
1342eb11fae6SDimitry Andric PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1343eb11fae6SDimitry Andric }
1344eb11fae6SDimitry Andric }
1345eb11fae6SDimitry Andric
1346eb11fae6SDimitry Andric // Record the domtree updates for the new blocks.
1347eb11fae6SDimitry Andric SmallPtrSet<BasicBlock *, 4> SuccSet;
1348eb11fae6SDimitry Andric for (auto *ClonedBB : NewBlocks) {
1349eb11fae6SDimitry Andric for (auto *SuccBB : successors(ClonedBB))
1350eb11fae6SDimitry Andric if (SuccSet.insert(SuccBB).second)
1351eb11fae6SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1352eb11fae6SDimitry Andric SuccSet.clear();
1353eb11fae6SDimitry Andric }
1354044eb2f6SDimitry Andric
1355044eb2f6SDimitry Andric return ClonedPH;
1356044eb2f6SDimitry Andric }
1357044eb2f6SDimitry Andric
1358044eb2f6SDimitry Andric /// Recursively clone the specified loop and all of its children.
1359044eb2f6SDimitry Andric ///
1360044eb2f6SDimitry Andric /// The target parent loop for the clone should be provided, or can be null if
1361044eb2f6SDimitry Andric /// the clone is a top-level loop. While cloning, all the blocks are mapped
1362044eb2f6SDimitry Andric /// with the provided value map. The entire original loop must be present in
1363044eb2f6SDimitry Andric /// the value map. The cloned loop is returned.
cloneLoopNest(Loop & OrigRootL,Loop * RootParentL,const ValueToValueMapTy & VMap,LoopInfo & LI)1364044eb2f6SDimitry Andric static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1365044eb2f6SDimitry Andric const ValueToValueMapTy &VMap, LoopInfo &LI) {
1366044eb2f6SDimitry Andric auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1367044eb2f6SDimitry Andric assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!");
1368044eb2f6SDimitry Andric ClonedL.reserveBlocks(OrigL.getNumBlocks());
1369044eb2f6SDimitry Andric for (auto *BB : OrigL.blocks()) {
1370044eb2f6SDimitry Andric auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1371044eb2f6SDimitry Andric ClonedL.addBlockEntry(ClonedBB);
1372eb11fae6SDimitry Andric if (LI.getLoopFor(BB) == &OrigL)
1373044eb2f6SDimitry Andric LI.changeLoopFor(ClonedBB, &ClonedL);
1374044eb2f6SDimitry Andric }
1375044eb2f6SDimitry Andric };
1376044eb2f6SDimitry Andric
1377044eb2f6SDimitry Andric // We specially handle the first loop because it may get cloned into
1378044eb2f6SDimitry Andric // a different parent and because we most commonly are cloning leaf loops.
1379044eb2f6SDimitry Andric Loop *ClonedRootL = LI.AllocateLoop();
1380044eb2f6SDimitry Andric if (RootParentL)
1381044eb2f6SDimitry Andric RootParentL->addChildLoop(ClonedRootL);
1382044eb2f6SDimitry Andric else
1383044eb2f6SDimitry Andric LI.addTopLevelLoop(ClonedRootL);
1384044eb2f6SDimitry Andric AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1385044eb2f6SDimitry Andric
1386b60736ecSDimitry Andric if (OrigRootL.isInnermost())
1387044eb2f6SDimitry Andric return ClonedRootL;
1388044eb2f6SDimitry Andric
1389044eb2f6SDimitry Andric // If we have a nest, we can quickly clone the entire loop nest using an
1390044eb2f6SDimitry Andric // iterative approach because it is a tree. We keep the cloned parent in the
1391044eb2f6SDimitry Andric // data structure to avoid repeatedly querying through a map to find it.
1392044eb2f6SDimitry Andric SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1393044eb2f6SDimitry Andric // Build up the loops to clone in reverse order as we'll clone them from the
1394044eb2f6SDimitry Andric // back.
1395044eb2f6SDimitry Andric for (Loop *ChildL : llvm::reverse(OrigRootL))
1396044eb2f6SDimitry Andric LoopsToClone.push_back({ClonedRootL, ChildL});
1397044eb2f6SDimitry Andric do {
1398044eb2f6SDimitry Andric Loop *ClonedParentL, *L;
1399044eb2f6SDimitry Andric std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1400044eb2f6SDimitry Andric Loop *ClonedL = LI.AllocateLoop();
1401044eb2f6SDimitry Andric ClonedParentL->addChildLoop(ClonedL);
1402044eb2f6SDimitry Andric AddClonedBlocksToLoop(*L, *ClonedL);
1403044eb2f6SDimitry Andric for (Loop *ChildL : llvm::reverse(*L))
1404044eb2f6SDimitry Andric LoopsToClone.push_back({ClonedL, ChildL});
1405044eb2f6SDimitry Andric } while (!LoopsToClone.empty());
1406044eb2f6SDimitry Andric
1407044eb2f6SDimitry Andric return ClonedRootL;
1408044eb2f6SDimitry Andric }
1409044eb2f6SDimitry Andric
1410044eb2f6SDimitry Andric /// Build the cloned loops of an original loop from unswitching.
1411044eb2f6SDimitry Andric ///
1412044eb2f6SDimitry Andric /// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1413044eb2f6SDimitry Andric /// operation. We need to re-verify that there even is a loop (as the backedge
1414044eb2f6SDimitry Andric /// may not have been cloned), and even if there are remaining backedges the
1415044eb2f6SDimitry Andric /// backedge set may be different. However, we know that each child loop is
1416044eb2f6SDimitry Andric /// undisturbed, we only need to find where to place each child loop within
1417044eb2f6SDimitry Andric /// either any parent loop or within a cloned version of the original loop.
1418044eb2f6SDimitry Andric ///
1419044eb2f6SDimitry Andric /// Because child loops may end up cloned outside of any cloned version of the
1420044eb2f6SDimitry Andric /// original loop, multiple cloned sibling loops may be created. All of them
1421044eb2f6SDimitry Andric /// are returned so that the newly introduced loop nest roots can be
1422044eb2f6SDimitry Andric /// identified.
buildClonedLoops(Loop & OrigL,ArrayRef<BasicBlock * > ExitBlocks,const ValueToValueMapTy & VMap,LoopInfo & LI,SmallVectorImpl<Loop * > & NonChildClonedLoops)1423eb11fae6SDimitry Andric static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1424044eb2f6SDimitry Andric const ValueToValueMapTy &VMap, LoopInfo &LI,
1425044eb2f6SDimitry Andric SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1426044eb2f6SDimitry Andric Loop *ClonedL = nullptr;
1427044eb2f6SDimitry Andric
1428044eb2f6SDimitry Andric auto *OrigPH = OrigL.getLoopPreheader();
1429044eb2f6SDimitry Andric auto *OrigHeader = OrigL.getHeader();
1430044eb2f6SDimitry Andric
1431044eb2f6SDimitry Andric auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1432044eb2f6SDimitry Andric auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1433044eb2f6SDimitry Andric
1434044eb2f6SDimitry Andric // We need to know the loops of the cloned exit blocks to even compute the
1435044eb2f6SDimitry Andric // accurate parent loop. If we only clone exits to some parent of the
1436044eb2f6SDimitry Andric // original parent, we want to clone into that outer loop. We also keep track
1437044eb2f6SDimitry Andric // of the loops that our cloned exit blocks participate in.
1438044eb2f6SDimitry Andric Loop *ParentL = nullptr;
1439044eb2f6SDimitry Andric SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1440044eb2f6SDimitry Andric SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
1441044eb2f6SDimitry Andric ClonedExitsInLoops.reserve(ExitBlocks.size());
1442044eb2f6SDimitry Andric for (auto *ExitBB : ExitBlocks)
1443044eb2f6SDimitry Andric if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1444044eb2f6SDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1445044eb2f6SDimitry Andric ExitLoopMap[ClonedExitBB] = ExitL;
1446044eb2f6SDimitry Andric ClonedExitsInLoops.push_back(ClonedExitBB);
1447044eb2f6SDimitry Andric if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1448044eb2f6SDimitry Andric ParentL = ExitL;
1449044eb2f6SDimitry Andric }
1450044eb2f6SDimitry Andric assert((!ParentL || ParentL == OrigL.getParentLoop() ||
1451044eb2f6SDimitry Andric ParentL->contains(OrigL.getParentLoop())) &&
1452044eb2f6SDimitry Andric "The computed parent loop should always contain (or be) the parent of "
1453044eb2f6SDimitry Andric "the original loop.");
1454044eb2f6SDimitry Andric
1455044eb2f6SDimitry Andric // We build the set of blocks dominated by the cloned header from the set of
1456044eb2f6SDimitry Andric // cloned blocks out of the original loop. While not all of these will
1457044eb2f6SDimitry Andric // necessarily be in the cloned loop, it is enough to establish that they
1458044eb2f6SDimitry Andric // aren't in unreachable cycles, etc.
1459044eb2f6SDimitry Andric SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1460044eb2f6SDimitry Andric for (auto *BB : OrigL.blocks())
1461044eb2f6SDimitry Andric if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1462044eb2f6SDimitry Andric ClonedLoopBlocks.insert(ClonedBB);
1463044eb2f6SDimitry Andric
1464044eb2f6SDimitry Andric // Rebuild the set of blocks that will end up in the cloned loop. We may have
1465044eb2f6SDimitry Andric // skipped cloning some region of this loop which can in turn skip some of
1466044eb2f6SDimitry Andric // the backedges so we have to rebuild the blocks in the loop based on the
1467044eb2f6SDimitry Andric // backedges that remain after cloning.
1468044eb2f6SDimitry Andric SmallVector<BasicBlock *, 16> Worklist;
1469044eb2f6SDimitry Andric SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1470044eb2f6SDimitry Andric for (auto *Pred : predecessors(ClonedHeader)) {
1471044eb2f6SDimitry Andric // The only possible non-loop header predecessor is the preheader because
1472044eb2f6SDimitry Andric // we know we cloned the loop in simplified form.
1473044eb2f6SDimitry Andric if (Pred == ClonedPH)
1474044eb2f6SDimitry Andric continue;
1475044eb2f6SDimitry Andric
1476044eb2f6SDimitry Andric // Because the loop was in simplified form, the only non-loop predecessor
1477044eb2f6SDimitry Andric // should be the preheader.
1478044eb2f6SDimitry Andric assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "
1479044eb2f6SDimitry Andric "header other than the preheader "
1480044eb2f6SDimitry Andric "that is not part of the loop!");
1481044eb2f6SDimitry Andric
1482044eb2f6SDimitry Andric // Insert this block into the loop set and on the first visit (and if it
1483044eb2f6SDimitry Andric // isn't the header we're currently walking) put it into the worklist to
1484044eb2f6SDimitry Andric // recurse through.
1485044eb2f6SDimitry Andric if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1486044eb2f6SDimitry Andric Worklist.push_back(Pred);
1487044eb2f6SDimitry Andric }
1488044eb2f6SDimitry Andric
1489044eb2f6SDimitry Andric // If we had any backedges then there *is* a cloned loop. Put the header into
1490044eb2f6SDimitry Andric // the loop set and then walk the worklist backwards to find all the blocks
1491044eb2f6SDimitry Andric // that remain within the loop after cloning.
1492044eb2f6SDimitry Andric if (!BlocksInClonedLoop.empty()) {
1493044eb2f6SDimitry Andric BlocksInClonedLoop.insert(ClonedHeader);
1494044eb2f6SDimitry Andric
1495044eb2f6SDimitry Andric while (!Worklist.empty()) {
1496044eb2f6SDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
1497044eb2f6SDimitry Andric assert(BlocksInClonedLoop.count(BB) &&
1498044eb2f6SDimitry Andric "Didn't put block into the loop set!");
1499044eb2f6SDimitry Andric
1500044eb2f6SDimitry Andric // Insert any predecessors that are in the possible set into the cloned
1501044eb2f6SDimitry Andric // set, and if the insert is successful, add them to the worklist. Note
1502044eb2f6SDimitry Andric // that we filter on the blocks that are definitely reachable via the
1503044eb2f6SDimitry Andric // backedge to the loop header so we may prune out dead code within the
1504044eb2f6SDimitry Andric // cloned loop.
1505044eb2f6SDimitry Andric for (auto *Pred : predecessors(BB))
1506044eb2f6SDimitry Andric if (ClonedLoopBlocks.count(Pred) &&
1507044eb2f6SDimitry Andric BlocksInClonedLoop.insert(Pred).second)
1508044eb2f6SDimitry Andric Worklist.push_back(Pred);
1509044eb2f6SDimitry Andric }
1510044eb2f6SDimitry Andric
1511044eb2f6SDimitry Andric ClonedL = LI.AllocateLoop();
1512044eb2f6SDimitry Andric if (ParentL) {
1513044eb2f6SDimitry Andric ParentL->addBasicBlockToLoop(ClonedPH, LI);
1514044eb2f6SDimitry Andric ParentL->addChildLoop(ClonedL);
1515044eb2f6SDimitry Andric } else {
1516044eb2f6SDimitry Andric LI.addTopLevelLoop(ClonedL);
1517044eb2f6SDimitry Andric }
1518eb11fae6SDimitry Andric NonChildClonedLoops.push_back(ClonedL);
1519044eb2f6SDimitry Andric
1520044eb2f6SDimitry Andric ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1521044eb2f6SDimitry Andric // We don't want to just add the cloned loop blocks based on how we
1522044eb2f6SDimitry Andric // discovered them. The original order of blocks was carefully built in
1523044eb2f6SDimitry Andric // a way that doesn't rely on predecessor ordering. Rather than re-invent
1524044eb2f6SDimitry Andric // that logic, we just re-walk the original blocks (and those of the child
1525044eb2f6SDimitry Andric // loops) and filter them as we add them into the cloned loop.
1526044eb2f6SDimitry Andric for (auto *BB : OrigL.blocks()) {
1527044eb2f6SDimitry Andric auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1528044eb2f6SDimitry Andric if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1529044eb2f6SDimitry Andric continue;
1530044eb2f6SDimitry Andric
1531044eb2f6SDimitry Andric // Directly add the blocks that are only in this loop.
1532044eb2f6SDimitry Andric if (LI.getLoopFor(BB) == &OrigL) {
1533044eb2f6SDimitry Andric ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1534044eb2f6SDimitry Andric continue;
1535044eb2f6SDimitry Andric }
1536044eb2f6SDimitry Andric
1537044eb2f6SDimitry Andric // We want to manually add it to this loop and parents.
1538044eb2f6SDimitry Andric // Registering it with LoopInfo will happen when we clone the top
1539044eb2f6SDimitry Andric // loop for this block.
1540044eb2f6SDimitry Andric for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1541044eb2f6SDimitry Andric PL->addBlockEntry(ClonedBB);
1542044eb2f6SDimitry Andric }
1543044eb2f6SDimitry Andric
1544044eb2f6SDimitry Andric // Now add each child loop whose header remains within the cloned loop. All
1545044eb2f6SDimitry Andric // of the blocks within the loop must satisfy the same constraints as the
1546044eb2f6SDimitry Andric // header so once we pass the header checks we can just clone the entire
1547044eb2f6SDimitry Andric // child loop nest.
1548044eb2f6SDimitry Andric for (Loop *ChildL : OrigL) {
1549044eb2f6SDimitry Andric auto *ClonedChildHeader =
1550044eb2f6SDimitry Andric cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1551044eb2f6SDimitry Andric if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1552044eb2f6SDimitry Andric continue;
1553044eb2f6SDimitry Andric
1554044eb2f6SDimitry Andric #ifndef NDEBUG
1555044eb2f6SDimitry Andric // We should never have a cloned child loop header but fail to have
1556044eb2f6SDimitry Andric // all of the blocks for that child loop.
1557044eb2f6SDimitry Andric for (auto *ChildLoopBB : ChildL->blocks())
1558044eb2f6SDimitry Andric assert(BlocksInClonedLoop.count(
1559044eb2f6SDimitry Andric cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&
1560044eb2f6SDimitry Andric "Child cloned loop has a header within the cloned outer "
1561044eb2f6SDimitry Andric "loop but not all of its blocks!");
1562044eb2f6SDimitry Andric #endif
1563044eb2f6SDimitry Andric
1564044eb2f6SDimitry Andric cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1565044eb2f6SDimitry Andric }
1566044eb2f6SDimitry Andric }
1567044eb2f6SDimitry Andric
1568044eb2f6SDimitry Andric // Now that we've handled all the components of the original loop that were
1569044eb2f6SDimitry Andric // cloned into a new loop, we still need to handle anything from the original
1570044eb2f6SDimitry Andric // loop that wasn't in a cloned loop.
1571044eb2f6SDimitry Andric
1572044eb2f6SDimitry Andric // Figure out what blocks are left to place within any loop nest containing
1573044eb2f6SDimitry Andric // the unswitched loop. If we never formed a loop, the cloned PH is one of
1574044eb2f6SDimitry Andric // them.
1575044eb2f6SDimitry Andric SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1576044eb2f6SDimitry Andric if (BlocksInClonedLoop.empty())
1577044eb2f6SDimitry Andric UnloopedBlockSet.insert(ClonedPH);
1578044eb2f6SDimitry Andric for (auto *ClonedBB : ClonedLoopBlocks)
1579044eb2f6SDimitry Andric if (!BlocksInClonedLoop.count(ClonedBB))
1580044eb2f6SDimitry Andric UnloopedBlockSet.insert(ClonedBB);
1581044eb2f6SDimitry Andric
1582044eb2f6SDimitry Andric // Copy the cloned exits and sort them in ascending loop depth, we'll work
1583044eb2f6SDimitry Andric // backwards across these to process them inside out. The order shouldn't
1584044eb2f6SDimitry Andric // matter as we're just trying to build up the map from inside-out; we use
1585044eb2f6SDimitry Andric // the map in a more stably ordered way below.
1586044eb2f6SDimitry Andric auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1587d8e91e46SDimitry Andric llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1588044eb2f6SDimitry Andric return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1589044eb2f6SDimitry Andric ExitLoopMap.lookup(RHS)->getLoopDepth();
1590044eb2f6SDimitry Andric });
1591044eb2f6SDimitry Andric
1592044eb2f6SDimitry Andric // Populate the existing ExitLoopMap with everything reachable from each
1593044eb2f6SDimitry Andric // exit, starting from the inner most exit.
1594044eb2f6SDimitry Andric while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1595044eb2f6SDimitry Andric assert(Worklist.empty() && "Didn't clear worklist!");
1596044eb2f6SDimitry Andric
1597044eb2f6SDimitry Andric BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1598044eb2f6SDimitry Andric Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1599044eb2f6SDimitry Andric
1600044eb2f6SDimitry Andric // Walk the CFG back until we hit the cloned PH adding everything reachable
1601044eb2f6SDimitry Andric // and in the unlooped set to this exit block's loop.
1602044eb2f6SDimitry Andric Worklist.push_back(ExitBB);
1603044eb2f6SDimitry Andric do {
1604044eb2f6SDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
1605044eb2f6SDimitry Andric // We can stop recursing at the cloned preheader (if we get there).
1606044eb2f6SDimitry Andric if (BB == ClonedPH)
1607044eb2f6SDimitry Andric continue;
1608044eb2f6SDimitry Andric
1609044eb2f6SDimitry Andric for (BasicBlock *PredBB : predecessors(BB)) {
1610044eb2f6SDimitry Andric // If this pred has already been moved to our set or is part of some
1611044eb2f6SDimitry Andric // (inner) loop, no update needed.
1612044eb2f6SDimitry Andric if (!UnloopedBlockSet.erase(PredBB)) {
1613044eb2f6SDimitry Andric assert(
1614044eb2f6SDimitry Andric (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&
1615044eb2f6SDimitry Andric "Predecessor not mapped to a loop!");
1616044eb2f6SDimitry Andric continue;
1617044eb2f6SDimitry Andric }
1618044eb2f6SDimitry Andric
1619044eb2f6SDimitry Andric // We just insert into the loop set here. We'll add these blocks to the
1620044eb2f6SDimitry Andric // exit loop after we build up the set in an order that doesn't rely on
1621044eb2f6SDimitry Andric // predecessor order (which in turn relies on use list order).
1622044eb2f6SDimitry Andric bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1623044eb2f6SDimitry Andric (void)Inserted;
1624044eb2f6SDimitry Andric assert(Inserted && "Should only visit an unlooped block once!");
1625044eb2f6SDimitry Andric
1626044eb2f6SDimitry Andric // And recurse through to its predecessors.
1627044eb2f6SDimitry Andric Worklist.push_back(PredBB);
1628044eb2f6SDimitry Andric }
1629044eb2f6SDimitry Andric } while (!Worklist.empty());
1630044eb2f6SDimitry Andric }
1631044eb2f6SDimitry Andric
1632044eb2f6SDimitry Andric // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1633044eb2f6SDimitry Andric // blocks to their outer loops, walk the cloned blocks and the cloned exits
1634044eb2f6SDimitry Andric // in their original order adding them to the correct loop.
1635044eb2f6SDimitry Andric
1636044eb2f6SDimitry Andric // We need a stable insertion order. We use the order of the original loop
1637044eb2f6SDimitry Andric // order and map into the correct parent loop.
1638044eb2f6SDimitry Andric for (auto *BB : llvm::concat<BasicBlock *const>(
1639e3b55780SDimitry Andric ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1640044eb2f6SDimitry Andric if (Loop *OuterL = ExitLoopMap.lookup(BB))
1641044eb2f6SDimitry Andric OuterL->addBasicBlockToLoop(BB, LI);
1642044eb2f6SDimitry Andric
1643044eb2f6SDimitry Andric #ifndef NDEBUG
1644044eb2f6SDimitry Andric for (auto &BBAndL : ExitLoopMap) {
1645044eb2f6SDimitry Andric auto *BB = BBAndL.first;
1646044eb2f6SDimitry Andric auto *OuterL = BBAndL.second;
1647044eb2f6SDimitry Andric assert(LI.getLoopFor(BB) == OuterL &&
1648044eb2f6SDimitry Andric "Failed to put all blocks into outer loops!");
1649044eb2f6SDimitry Andric }
1650044eb2f6SDimitry Andric #endif
1651044eb2f6SDimitry Andric
1652044eb2f6SDimitry Andric // Now that all the blocks are placed into the correct containing loop in the
1653044eb2f6SDimitry Andric // absence of child loops, find all the potentially cloned child loops and
1654044eb2f6SDimitry Andric // clone them into whatever outer loop we placed their header into.
1655044eb2f6SDimitry Andric for (Loop *ChildL : OrigL) {
1656044eb2f6SDimitry Andric auto *ClonedChildHeader =
1657044eb2f6SDimitry Andric cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1658044eb2f6SDimitry Andric if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1659044eb2f6SDimitry Andric continue;
1660044eb2f6SDimitry Andric
1661044eb2f6SDimitry Andric #ifndef NDEBUG
1662044eb2f6SDimitry Andric for (auto *ChildLoopBB : ChildL->blocks())
1663044eb2f6SDimitry Andric assert(VMap.count(ChildLoopBB) &&
1664044eb2f6SDimitry Andric "Cloned a child loop header but not all of that loops blocks!");
1665044eb2f6SDimitry Andric #endif
1666044eb2f6SDimitry Andric
1667044eb2f6SDimitry Andric NonChildClonedLoops.push_back(cloneLoopNest(
1668044eb2f6SDimitry Andric *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1669044eb2f6SDimitry Andric }
1670044eb2f6SDimitry Andric }
1671044eb2f6SDimitry Andric
1672eb11fae6SDimitry Andric static void
deleteDeadClonedBlocks(Loop & L,ArrayRef<BasicBlock * > ExitBlocks,ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,DominatorTree & DT,MemorySSAUpdater * MSSAU)1673eb11fae6SDimitry Andric deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1674eb11fae6SDimitry Andric ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1675d8e91e46SDimitry Andric DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1676eb11fae6SDimitry Andric // Find all the dead clones, and remove them from their successors.
1677eb11fae6SDimitry Andric SmallVector<BasicBlock *, 16> DeadBlocks;
1678eb11fae6SDimitry Andric for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1679e3b55780SDimitry Andric for (const auto &VMap : VMaps)
1680eb11fae6SDimitry Andric if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1681eb11fae6SDimitry Andric if (!DT.isReachableFromEntry(ClonedBB)) {
1682eb11fae6SDimitry Andric for (BasicBlock *SuccBB : successors(ClonedBB))
1683eb11fae6SDimitry Andric SuccBB->removePredecessor(ClonedBB);
1684eb11fae6SDimitry Andric DeadBlocks.push_back(ClonedBB);
1685eb11fae6SDimitry Andric }
1686eb11fae6SDimitry Andric
1687d8e91e46SDimitry Andric // Remove all MemorySSA in the dead blocks
1688d8e91e46SDimitry Andric if (MSSAU) {
1689e6d15924SDimitry Andric SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1690d8e91e46SDimitry Andric DeadBlocks.end());
1691d8e91e46SDimitry Andric MSSAU->removeBlocks(DeadBlockSet);
1692d8e91e46SDimitry Andric }
1693d8e91e46SDimitry Andric
1694eb11fae6SDimitry Andric // Drop any remaining references to break cycles.
1695eb11fae6SDimitry Andric for (BasicBlock *BB : DeadBlocks)
1696eb11fae6SDimitry Andric BB->dropAllReferences();
1697eb11fae6SDimitry Andric // Erase them from the IR.
1698eb11fae6SDimitry Andric for (BasicBlock *BB : DeadBlocks)
1699eb11fae6SDimitry Andric BB->eraseFromParent();
1700eb11fae6SDimitry Andric }
1701eb11fae6SDimitry Andric
deleteDeadBlocksFromLoop(Loop & L,SmallVectorImpl<BasicBlock * > & ExitBlocks,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU,ScalarEvolution * SE,LPMUpdater & LoopUpdater)1702b1c73532SDimitry Andric static void deleteDeadBlocksFromLoop(Loop &L,
1703044eb2f6SDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks,
1704d8e91e46SDimitry Andric DominatorTree &DT, LoopInfo &LI,
1705c0981da4SDimitry Andric MemorySSAUpdater *MSSAU,
1706e3b55780SDimitry Andric ScalarEvolution *SE,
1707b1c73532SDimitry Andric LPMUpdater &LoopUpdater) {
1708d8e91e46SDimitry Andric // Find all the dead blocks tied to this loop, and remove them from their
1709d8e91e46SDimitry Andric // successors.
1710e6d15924SDimitry Andric SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1711d8e91e46SDimitry Andric
1712d8e91e46SDimitry Andric // Start with loop/exit blocks and get a transitive closure of reachable dead
1713d8e91e46SDimitry Andric // blocks.
1714d8e91e46SDimitry Andric SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1715d8e91e46SDimitry Andric ExitBlocks.end());
1716d8e91e46SDimitry Andric DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1717d8e91e46SDimitry Andric while (!DeathCandidates.empty()) {
1718d8e91e46SDimitry Andric auto *BB = DeathCandidates.pop_back_val();
1719d8e91e46SDimitry Andric if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1720d8e91e46SDimitry Andric for (BasicBlock *SuccBB : successors(BB)) {
1721eb11fae6SDimitry Andric SuccBB->removePredecessor(BB);
1722d8e91e46SDimitry Andric DeathCandidates.push_back(SuccBB);
1723d8e91e46SDimitry Andric }
1724d8e91e46SDimitry Andric DeadBlockSet.insert(BB);
1725d8e91e46SDimitry Andric }
1726044eb2f6SDimitry Andric }
1727044eb2f6SDimitry Andric
1728d8e91e46SDimitry Andric // Remove all MemorySSA in the dead blocks
1729d8e91e46SDimitry Andric if (MSSAU)
1730d8e91e46SDimitry Andric MSSAU->removeBlocks(DeadBlockSet);
1731eb11fae6SDimitry Andric
1732044eb2f6SDimitry Andric // Filter out the dead blocks from the exit blocks list so that it can be
1733044eb2f6SDimitry Andric // used in the caller.
1734044eb2f6SDimitry Andric llvm::erase_if(ExitBlocks,
1735eb11fae6SDimitry Andric [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1736044eb2f6SDimitry Andric
1737044eb2f6SDimitry Andric // Walk from this loop up through its parents removing all of the dead blocks.
1738044eb2f6SDimitry Andric for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1739d8e91e46SDimitry Andric for (auto *BB : DeadBlockSet)
1740044eb2f6SDimitry Andric ParentL->getBlocksSet().erase(BB);
1741044eb2f6SDimitry Andric llvm::erase_if(ParentL->getBlocksVector(),
1742eb11fae6SDimitry Andric [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1743044eb2f6SDimitry Andric }
1744044eb2f6SDimitry Andric
1745044eb2f6SDimitry Andric // Now delete the dead child loops. This raw delete will clear them
1746044eb2f6SDimitry Andric // recursively.
1747044eb2f6SDimitry Andric llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1748eb11fae6SDimitry Andric if (!DeadBlockSet.count(ChildL->getHeader()))
1749044eb2f6SDimitry Andric return false;
1750044eb2f6SDimitry Andric
1751044eb2f6SDimitry Andric assert(llvm::all_of(ChildL->blocks(),
1752044eb2f6SDimitry Andric [&](BasicBlock *ChildBB) {
1753eb11fae6SDimitry Andric return DeadBlockSet.count(ChildBB);
1754044eb2f6SDimitry Andric }) &&
1755044eb2f6SDimitry Andric "If the child loop header is dead all blocks in the child loop must "
1756044eb2f6SDimitry Andric "be dead as well!");
1757b1c73532SDimitry Andric LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName());
1758e3b55780SDimitry Andric if (SE)
1759e3b55780SDimitry Andric SE->forgetBlockAndLoopDispositions();
1760044eb2f6SDimitry Andric LI.destroy(ChildL);
1761044eb2f6SDimitry Andric return true;
1762044eb2f6SDimitry Andric });
1763044eb2f6SDimitry Andric
1764eb11fae6SDimitry Andric // Remove the loop mappings for the dead blocks and drop all the references
1765eb11fae6SDimitry Andric // from these blocks to others to handle cyclic references as we start
1766eb11fae6SDimitry Andric // deleting the blocks themselves.
1767d8e91e46SDimitry Andric for (auto *BB : DeadBlockSet) {
1768eb11fae6SDimitry Andric // Check that the dominator tree has already been updated.
1769eb11fae6SDimitry Andric assert(!DT.getNode(BB) && "Should already have cleared domtree!");
1770044eb2f6SDimitry Andric LI.changeLoopFor(BB, nullptr);
1771cfca06d7SDimitry Andric // Drop all uses of the instructions to make sure we won't have dangling
1772cfca06d7SDimitry Andric // uses in other blocks.
1773cfca06d7SDimitry Andric for (auto &I : *BB)
1774cfca06d7SDimitry Andric if (!I.use_empty())
1775145449b1SDimitry Andric I.replaceAllUsesWith(PoisonValue::get(I.getType()));
1776044eb2f6SDimitry Andric BB->dropAllReferences();
1777044eb2f6SDimitry Andric }
1778eb11fae6SDimitry Andric
1779eb11fae6SDimitry Andric // Actually delete the blocks now that they've been fully unhooked from the
1780eb11fae6SDimitry Andric // IR.
1781d8e91e46SDimitry Andric for (auto *BB : DeadBlockSet)
1782eb11fae6SDimitry Andric BB->eraseFromParent();
1783044eb2f6SDimitry Andric }
1784044eb2f6SDimitry Andric
1785044eb2f6SDimitry Andric /// Recompute the set of blocks in a loop after unswitching.
1786044eb2f6SDimitry Andric ///
1787044eb2f6SDimitry Andric /// This walks from the original headers predecessors to rebuild the loop. We
1788044eb2f6SDimitry Andric /// take advantage of the fact that new blocks can't have been added, and so we
1789044eb2f6SDimitry Andric /// filter by the original loop's blocks. This also handles potentially
1790044eb2f6SDimitry Andric /// unreachable code that we don't want to explore but might be found examining
1791044eb2f6SDimitry Andric /// the predecessors of the header.
1792044eb2f6SDimitry Andric ///
1793044eb2f6SDimitry Andric /// If the original loop is no longer a loop, this will return an empty set. If
1794044eb2f6SDimitry Andric /// it remains a loop, all the blocks within it will be added to the set
1795044eb2f6SDimitry Andric /// (including those blocks in inner loops).
recomputeLoopBlockSet(Loop & L,LoopInfo & LI)1796044eb2f6SDimitry Andric static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
1797044eb2f6SDimitry Andric LoopInfo &LI) {
1798044eb2f6SDimitry Andric SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1799044eb2f6SDimitry Andric
1800044eb2f6SDimitry Andric auto *PH = L.getLoopPreheader();
1801044eb2f6SDimitry Andric auto *Header = L.getHeader();
1802044eb2f6SDimitry Andric
1803044eb2f6SDimitry Andric // A worklist to use while walking backwards from the header.
1804044eb2f6SDimitry Andric SmallVector<BasicBlock *, 16> Worklist;
1805044eb2f6SDimitry Andric
1806044eb2f6SDimitry Andric // First walk the predecessors of the header to find the backedges. This will
1807044eb2f6SDimitry Andric // form the basis of our walk.
1808044eb2f6SDimitry Andric for (auto *Pred : predecessors(Header)) {
1809044eb2f6SDimitry Andric // Skip the preheader.
1810044eb2f6SDimitry Andric if (Pred == PH)
1811044eb2f6SDimitry Andric continue;
1812044eb2f6SDimitry Andric
1813044eb2f6SDimitry Andric // Because the loop was in simplified form, the only non-loop predecessor
1814044eb2f6SDimitry Andric // is the preheader.
1815044eb2f6SDimitry Andric assert(L.contains(Pred) && "Found a predecessor of the loop header other "
1816044eb2f6SDimitry Andric "than the preheader that is not part of the "
1817044eb2f6SDimitry Andric "loop!");
1818044eb2f6SDimitry Andric
1819044eb2f6SDimitry Andric // Insert this block into the loop set and on the first visit and, if it
1820044eb2f6SDimitry Andric // isn't the header we're currently walking, put it into the worklist to
1821044eb2f6SDimitry Andric // recurse through.
1822044eb2f6SDimitry Andric if (LoopBlockSet.insert(Pred).second && Pred != Header)
1823044eb2f6SDimitry Andric Worklist.push_back(Pred);
1824044eb2f6SDimitry Andric }
1825044eb2f6SDimitry Andric
1826044eb2f6SDimitry Andric // If no backedges were found, we're done.
1827044eb2f6SDimitry Andric if (LoopBlockSet.empty())
1828044eb2f6SDimitry Andric return LoopBlockSet;
1829044eb2f6SDimitry Andric
1830044eb2f6SDimitry Andric // We found backedges, recurse through them to identify the loop blocks.
1831044eb2f6SDimitry Andric while (!Worklist.empty()) {
1832044eb2f6SDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
1833044eb2f6SDimitry Andric assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!");
1834044eb2f6SDimitry Andric
1835eb11fae6SDimitry Andric // No need to walk past the header.
1836eb11fae6SDimitry Andric if (BB == Header)
1837eb11fae6SDimitry Andric continue;
1838eb11fae6SDimitry Andric
1839044eb2f6SDimitry Andric // Because we know the inner loop structure remains valid we can use the
1840044eb2f6SDimitry Andric // loop structure to jump immediately across the entire nested loop.
1841044eb2f6SDimitry Andric // Further, because it is in loop simplified form, we can directly jump
1842044eb2f6SDimitry Andric // to its preheader afterward.
1843044eb2f6SDimitry Andric if (Loop *InnerL = LI.getLoopFor(BB))
1844044eb2f6SDimitry Andric if (InnerL != &L) {
1845044eb2f6SDimitry Andric assert(L.contains(InnerL) &&
1846044eb2f6SDimitry Andric "Should not reach a loop *outside* this loop!");
1847044eb2f6SDimitry Andric // The preheader is the only possible predecessor of the loop so
1848044eb2f6SDimitry Andric // insert it into the set and check whether it was already handled.
1849044eb2f6SDimitry Andric auto *InnerPH = InnerL->getLoopPreheader();
1850044eb2f6SDimitry Andric assert(L.contains(InnerPH) && "Cannot contain an inner loop block "
1851044eb2f6SDimitry Andric "but not contain the inner loop "
1852044eb2f6SDimitry Andric "preheader!");
1853044eb2f6SDimitry Andric if (!LoopBlockSet.insert(InnerPH).second)
1854044eb2f6SDimitry Andric // The only way to reach the preheader is through the loop body
1855044eb2f6SDimitry Andric // itself so if it has been visited the loop is already handled.
1856044eb2f6SDimitry Andric continue;
1857044eb2f6SDimitry Andric
1858044eb2f6SDimitry Andric // Insert all of the blocks (other than those already present) into
1859eb11fae6SDimitry Andric // the loop set. We expect at least the block that led us to find the
1860eb11fae6SDimitry Andric // inner loop to be in the block set, but we may also have other loop
1861eb11fae6SDimitry Andric // blocks if they were already enqueued as predecessors of some other
1862eb11fae6SDimitry Andric // outer loop block.
1863044eb2f6SDimitry Andric for (auto *InnerBB : InnerL->blocks()) {
1864044eb2f6SDimitry Andric if (InnerBB == BB) {
1865044eb2f6SDimitry Andric assert(LoopBlockSet.count(InnerBB) &&
1866044eb2f6SDimitry Andric "Block should already be in the set!");
1867044eb2f6SDimitry Andric continue;
1868044eb2f6SDimitry Andric }
1869044eb2f6SDimitry Andric
1870eb11fae6SDimitry Andric LoopBlockSet.insert(InnerBB);
1871044eb2f6SDimitry Andric }
1872044eb2f6SDimitry Andric
1873044eb2f6SDimitry Andric // Add the preheader to the worklist so we will continue past the
1874044eb2f6SDimitry Andric // loop body.
1875044eb2f6SDimitry Andric Worklist.push_back(InnerPH);
1876044eb2f6SDimitry Andric continue;
1877044eb2f6SDimitry Andric }
1878044eb2f6SDimitry Andric
1879044eb2f6SDimitry Andric // Insert any predecessors that were in the original loop into the new
1880044eb2f6SDimitry Andric // set, and if the insert is successful, add them to the worklist.
1881044eb2f6SDimitry Andric for (auto *Pred : predecessors(BB))
1882044eb2f6SDimitry Andric if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1883044eb2f6SDimitry Andric Worklist.push_back(Pred);
1884044eb2f6SDimitry Andric }
1885044eb2f6SDimitry Andric
1886eb11fae6SDimitry Andric assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!");
1887eb11fae6SDimitry Andric
1888044eb2f6SDimitry Andric // We've found all the blocks participating in the loop, return our completed
1889044eb2f6SDimitry Andric // set.
1890044eb2f6SDimitry Andric return LoopBlockSet;
1891044eb2f6SDimitry Andric }
1892044eb2f6SDimitry Andric
1893044eb2f6SDimitry Andric /// Rebuild a loop after unswitching removes some subset of blocks and edges.
1894044eb2f6SDimitry Andric ///
1895044eb2f6SDimitry Andric /// The removal may have removed some child loops entirely but cannot have
1896044eb2f6SDimitry Andric /// disturbed any remaining child loops. However, they may need to be hoisted
1897044eb2f6SDimitry Andric /// to the parent loop (or to be top-level loops). The original loop may be
1898044eb2f6SDimitry Andric /// completely removed.
1899044eb2f6SDimitry Andric ///
1900044eb2f6SDimitry Andric /// The sibling loops resulting from this update are returned. If the original
1901044eb2f6SDimitry Andric /// loop remains a valid loop, it will be the first entry in this list with all
1902044eb2f6SDimitry Andric /// of the newly sibling loops following it.
1903044eb2f6SDimitry Andric ///
1904044eb2f6SDimitry Andric /// Returns true if the loop remains a loop after unswitching, and false if it
1905044eb2f6SDimitry Andric /// is no longer a loop after unswitching (and should not continue to be
1906044eb2f6SDimitry Andric /// referenced).
rebuildLoopAfterUnswitch(Loop & L,ArrayRef<BasicBlock * > ExitBlocks,LoopInfo & LI,SmallVectorImpl<Loop * > & HoistedLoops,ScalarEvolution * SE)1907044eb2f6SDimitry Andric static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1908044eb2f6SDimitry Andric LoopInfo &LI,
1909e3b55780SDimitry Andric SmallVectorImpl<Loop *> &HoistedLoops,
1910e3b55780SDimitry Andric ScalarEvolution *SE) {
1911044eb2f6SDimitry Andric auto *PH = L.getLoopPreheader();
1912044eb2f6SDimitry Andric
1913044eb2f6SDimitry Andric // Compute the actual parent loop from the exit blocks. Because we may have
1914044eb2f6SDimitry Andric // pruned some exits the loop may be different from the original parent.
1915044eb2f6SDimitry Andric Loop *ParentL = nullptr;
1916044eb2f6SDimitry Andric SmallVector<Loop *, 4> ExitLoops;
1917044eb2f6SDimitry Andric SmallVector<BasicBlock *, 4> ExitsInLoops;
1918044eb2f6SDimitry Andric ExitsInLoops.reserve(ExitBlocks.size());
1919044eb2f6SDimitry Andric for (auto *ExitBB : ExitBlocks)
1920044eb2f6SDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1921044eb2f6SDimitry Andric ExitLoops.push_back(ExitL);
1922044eb2f6SDimitry Andric ExitsInLoops.push_back(ExitBB);
1923044eb2f6SDimitry Andric if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1924044eb2f6SDimitry Andric ParentL = ExitL;
1925044eb2f6SDimitry Andric }
1926044eb2f6SDimitry Andric
1927044eb2f6SDimitry Andric // Recompute the blocks participating in this loop. This may be empty if it
1928044eb2f6SDimitry Andric // is no longer a loop.
1929044eb2f6SDimitry Andric auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1930044eb2f6SDimitry Andric
1931044eb2f6SDimitry Andric // If we still have a loop, we need to re-set the loop's parent as the exit
1932044eb2f6SDimitry Andric // block set changing may have moved it within the loop nest. Note that this
1933044eb2f6SDimitry Andric // can only happen when this loop has a parent as it can only hoist the loop
1934044eb2f6SDimitry Andric // *up* the nest.
1935044eb2f6SDimitry Andric if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1936044eb2f6SDimitry Andric // Remove this loop's (original) blocks from all of the intervening loops.
1937044eb2f6SDimitry Andric for (Loop *IL = L.getParentLoop(); IL != ParentL;
1938044eb2f6SDimitry Andric IL = IL->getParentLoop()) {
1939044eb2f6SDimitry Andric IL->getBlocksSet().erase(PH);
1940044eb2f6SDimitry Andric for (auto *BB : L.blocks())
1941044eb2f6SDimitry Andric IL->getBlocksSet().erase(BB);
1942044eb2f6SDimitry Andric llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1943044eb2f6SDimitry Andric return BB == PH || L.contains(BB);
1944044eb2f6SDimitry Andric });
1945044eb2f6SDimitry Andric }
1946044eb2f6SDimitry Andric
1947044eb2f6SDimitry Andric LI.changeLoopFor(PH, ParentL);
1948044eb2f6SDimitry Andric L.getParentLoop()->removeChildLoop(&L);
1949044eb2f6SDimitry Andric if (ParentL)
1950044eb2f6SDimitry Andric ParentL->addChildLoop(&L);
1951044eb2f6SDimitry Andric else
1952044eb2f6SDimitry Andric LI.addTopLevelLoop(&L);
1953044eb2f6SDimitry Andric }
1954044eb2f6SDimitry Andric
1955044eb2f6SDimitry Andric // Now we update all the blocks which are no longer within the loop.
1956044eb2f6SDimitry Andric auto &Blocks = L.getBlocksVector();
1957044eb2f6SDimitry Andric auto BlocksSplitI =
1958044eb2f6SDimitry Andric LoopBlockSet.empty()
1959044eb2f6SDimitry Andric ? Blocks.begin()
1960044eb2f6SDimitry Andric : std::stable_partition(
1961044eb2f6SDimitry Andric Blocks.begin(), Blocks.end(),
1962044eb2f6SDimitry Andric [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1963044eb2f6SDimitry Andric
1964044eb2f6SDimitry Andric // Before we erase the list of unlooped blocks, build a set of them.
1965044eb2f6SDimitry Andric SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1966044eb2f6SDimitry Andric if (LoopBlockSet.empty())
1967044eb2f6SDimitry Andric UnloopedBlocks.insert(PH);
1968044eb2f6SDimitry Andric
1969044eb2f6SDimitry Andric // Now erase these blocks from the loop.
1970044eb2f6SDimitry Andric for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1971044eb2f6SDimitry Andric L.getBlocksSet().erase(BB);
1972044eb2f6SDimitry Andric Blocks.erase(BlocksSplitI, Blocks.end());
1973044eb2f6SDimitry Andric
1974044eb2f6SDimitry Andric // Sort the exits in ascending loop depth, we'll work backwards across these
1975044eb2f6SDimitry Andric // to process them inside out.
1976e6d15924SDimitry Andric llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1977044eb2f6SDimitry Andric return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1978044eb2f6SDimitry Andric });
1979044eb2f6SDimitry Andric
1980044eb2f6SDimitry Andric // We'll build up a set for each exit loop.
1981044eb2f6SDimitry Andric SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1982044eb2f6SDimitry Andric Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1983044eb2f6SDimitry Andric
1984044eb2f6SDimitry Andric auto RemoveUnloopedBlocksFromLoop =
1985044eb2f6SDimitry Andric [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1986044eb2f6SDimitry Andric for (auto *BB : UnloopedBlocks)
1987044eb2f6SDimitry Andric L.getBlocksSet().erase(BB);
1988044eb2f6SDimitry Andric llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1989044eb2f6SDimitry Andric return UnloopedBlocks.count(BB);
1990044eb2f6SDimitry Andric });
1991044eb2f6SDimitry Andric };
1992044eb2f6SDimitry Andric
1993044eb2f6SDimitry Andric SmallVector<BasicBlock *, 16> Worklist;
1994044eb2f6SDimitry Andric while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1995044eb2f6SDimitry Andric assert(Worklist.empty() && "Didn't clear worklist!");
1996044eb2f6SDimitry Andric assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!");
1997044eb2f6SDimitry Andric
1998044eb2f6SDimitry Andric // Grab the next exit block, in decreasing loop depth order.
1999044eb2f6SDimitry Andric BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
2000044eb2f6SDimitry Andric Loop &ExitL = *LI.getLoopFor(ExitBB);
2001044eb2f6SDimitry Andric assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!");
2002044eb2f6SDimitry Andric
2003044eb2f6SDimitry Andric // Erase all of the unlooped blocks from the loops between the previous
2004044eb2f6SDimitry Andric // exit loop and this exit loop. This works because the ExitInLoops list is
2005044eb2f6SDimitry Andric // sorted in increasing order of loop depth and thus we visit loops in
2006044eb2f6SDimitry Andric // decreasing order of loop depth.
2007044eb2f6SDimitry Andric for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
2008044eb2f6SDimitry Andric RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2009044eb2f6SDimitry Andric
2010044eb2f6SDimitry Andric // Walk the CFG back until we hit the cloned PH adding everything reachable
2011044eb2f6SDimitry Andric // and in the unlooped set to this exit block's loop.
2012044eb2f6SDimitry Andric Worklist.push_back(ExitBB);
2013044eb2f6SDimitry Andric do {
2014044eb2f6SDimitry Andric BasicBlock *BB = Worklist.pop_back_val();
2015044eb2f6SDimitry Andric // We can stop recursing at the cloned preheader (if we get there).
2016044eb2f6SDimitry Andric if (BB == PH)
2017044eb2f6SDimitry Andric continue;
2018044eb2f6SDimitry Andric
2019044eb2f6SDimitry Andric for (BasicBlock *PredBB : predecessors(BB)) {
2020044eb2f6SDimitry Andric // If this pred has already been moved to our set or is part of some
2021044eb2f6SDimitry Andric // (inner) loop, no update needed.
2022044eb2f6SDimitry Andric if (!UnloopedBlocks.erase(PredBB)) {
2023044eb2f6SDimitry Andric assert((NewExitLoopBlocks.count(PredBB) ||
2024044eb2f6SDimitry Andric ExitL.contains(LI.getLoopFor(PredBB))) &&
2025044eb2f6SDimitry Andric "Predecessor not in a nested loop (or already visited)!");
2026044eb2f6SDimitry Andric continue;
2027044eb2f6SDimitry Andric }
2028044eb2f6SDimitry Andric
2029044eb2f6SDimitry Andric // We just insert into the loop set here. We'll add these blocks to the
2030044eb2f6SDimitry Andric // exit loop after we build up the set in a deterministic order rather
2031044eb2f6SDimitry Andric // than the predecessor-influenced visit order.
2032044eb2f6SDimitry Andric bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
2033044eb2f6SDimitry Andric (void)Inserted;
2034044eb2f6SDimitry Andric assert(Inserted && "Should only visit an unlooped block once!");
2035044eb2f6SDimitry Andric
2036044eb2f6SDimitry Andric // And recurse through to its predecessors.
2037044eb2f6SDimitry Andric Worklist.push_back(PredBB);
2038044eb2f6SDimitry Andric }
2039044eb2f6SDimitry Andric } while (!Worklist.empty());
2040044eb2f6SDimitry Andric
2041044eb2f6SDimitry Andric // If blocks in this exit loop were directly part of the original loop (as
2042044eb2f6SDimitry Andric // opposed to a child loop) update the map to point to this exit loop. This
2043044eb2f6SDimitry Andric // just updates a map and so the fact that the order is unstable is fine.
2044044eb2f6SDimitry Andric for (auto *BB : NewExitLoopBlocks)
2045044eb2f6SDimitry Andric if (Loop *BBL = LI.getLoopFor(BB))
2046044eb2f6SDimitry Andric if (BBL == &L || !L.contains(BBL))
2047044eb2f6SDimitry Andric LI.changeLoopFor(BB, &ExitL);
2048044eb2f6SDimitry Andric
2049044eb2f6SDimitry Andric // We will remove the remaining unlooped blocks from this loop in the next
2050044eb2f6SDimitry Andric // iteration or below.
2051044eb2f6SDimitry Andric NewExitLoopBlocks.clear();
2052044eb2f6SDimitry Andric }
2053044eb2f6SDimitry Andric
2054044eb2f6SDimitry Andric // Any remaining unlooped blocks are no longer part of any loop unless they
2055044eb2f6SDimitry Andric // are part of some child loop.
2056044eb2f6SDimitry Andric for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
2057044eb2f6SDimitry Andric RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
2058044eb2f6SDimitry Andric for (auto *BB : UnloopedBlocks)
2059044eb2f6SDimitry Andric if (Loop *BBL = LI.getLoopFor(BB))
2060044eb2f6SDimitry Andric if (BBL == &L || !L.contains(BBL))
2061044eb2f6SDimitry Andric LI.changeLoopFor(BB, nullptr);
2062044eb2f6SDimitry Andric
2063044eb2f6SDimitry Andric // Sink all the child loops whose headers are no longer in the loop set to
2064044eb2f6SDimitry Andric // the parent (or to be top level loops). We reach into the loop and directly
2065044eb2f6SDimitry Andric // update its subloop vector to make this batch update efficient.
2066044eb2f6SDimitry Andric auto &SubLoops = L.getSubLoopsVector();
2067044eb2f6SDimitry Andric auto SubLoopsSplitI =
2068044eb2f6SDimitry Andric LoopBlockSet.empty()
2069044eb2f6SDimitry Andric ? SubLoops.begin()
2070044eb2f6SDimitry Andric : std::stable_partition(
2071044eb2f6SDimitry Andric SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
2072044eb2f6SDimitry Andric return LoopBlockSet.count(SubL->getHeader());
2073044eb2f6SDimitry Andric });
2074044eb2f6SDimitry Andric for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
2075044eb2f6SDimitry Andric HoistedLoops.push_back(HoistedL);
2076044eb2f6SDimitry Andric HoistedL->setParentLoop(nullptr);
2077044eb2f6SDimitry Andric
2078044eb2f6SDimitry Andric // To compute the new parent of this hoisted loop we look at where we
2079044eb2f6SDimitry Andric // placed the preheader above. We can't lookup the header itself because we
2080044eb2f6SDimitry Andric // retained the mapping from the header to the hoisted loop. But the
2081044eb2f6SDimitry Andric // preheader and header should have the exact same new parent computed
2082044eb2f6SDimitry Andric // based on the set of exit blocks from the original loop as the preheader
2083044eb2f6SDimitry Andric // is a predecessor of the header and so reached in the reverse walk. And
2084044eb2f6SDimitry Andric // because the loops were all in simplified form the preheader of the
2085044eb2f6SDimitry Andric // hoisted loop can't be part of some *other* loop.
2086044eb2f6SDimitry Andric if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
2087044eb2f6SDimitry Andric NewParentL->addChildLoop(HoistedL);
2088044eb2f6SDimitry Andric else
2089044eb2f6SDimitry Andric LI.addTopLevelLoop(HoistedL);
2090044eb2f6SDimitry Andric }
2091044eb2f6SDimitry Andric SubLoops.erase(SubLoopsSplitI, SubLoops.end());
2092044eb2f6SDimitry Andric
2093044eb2f6SDimitry Andric // Actually delete the loop if nothing remained within it.
2094044eb2f6SDimitry Andric if (Blocks.empty()) {
2095044eb2f6SDimitry Andric assert(SubLoops.empty() &&
2096044eb2f6SDimitry Andric "Failed to remove all subloops from the original loop!");
2097044eb2f6SDimitry Andric if (Loop *ParentL = L.getParentLoop())
2098044eb2f6SDimitry Andric ParentL->removeChildLoop(llvm::find(*ParentL, &L));
2099044eb2f6SDimitry Andric else
2100044eb2f6SDimitry Andric LI.removeLoop(llvm::find(LI, &L));
2101b1c73532SDimitry Andric // markLoopAsDeleted for L should be triggered by the caller (it is
2102b1c73532SDimitry Andric // typically done within postUnswitch).
2103e3b55780SDimitry Andric if (SE)
2104e3b55780SDimitry Andric SE->forgetBlockAndLoopDispositions();
2105044eb2f6SDimitry Andric LI.destroy(&L);
2106044eb2f6SDimitry Andric return false;
2107044eb2f6SDimitry Andric }
2108044eb2f6SDimitry Andric
2109044eb2f6SDimitry Andric return true;
2110044eb2f6SDimitry Andric }
2111044eb2f6SDimitry Andric
2112044eb2f6SDimitry Andric /// Helper to visit a dominator subtree, invoking a callable on each node.
2113044eb2f6SDimitry Andric ///
2114044eb2f6SDimitry Andric /// Returning false at any point will stop walking past that node of the tree.
2115044eb2f6SDimitry Andric template <typename CallableT>
visitDomSubTree(DominatorTree & DT,BasicBlock * BB,CallableT Callable)2116044eb2f6SDimitry Andric void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2117044eb2f6SDimitry Andric SmallVector<DomTreeNode *, 4> DomWorklist;
2118044eb2f6SDimitry Andric DomWorklist.push_back(DT[BB]);
2119044eb2f6SDimitry Andric #ifndef NDEBUG
2120044eb2f6SDimitry Andric SmallPtrSet<DomTreeNode *, 4> Visited;
2121044eb2f6SDimitry Andric Visited.insert(DT[BB]);
2122044eb2f6SDimitry Andric #endif
2123044eb2f6SDimitry Andric do {
2124044eb2f6SDimitry Andric DomTreeNode *N = DomWorklist.pop_back_val();
2125044eb2f6SDimitry Andric
2126044eb2f6SDimitry Andric // Visit this node.
2127044eb2f6SDimitry Andric if (!Callable(N->getBlock()))
2128044eb2f6SDimitry Andric continue;
2129044eb2f6SDimitry Andric
2130044eb2f6SDimitry Andric // Accumulate the child nodes.
2131044eb2f6SDimitry Andric for (DomTreeNode *ChildN : *N) {
2132044eb2f6SDimitry Andric assert(Visited.insert(ChildN).second &&
2133044eb2f6SDimitry Andric "Cannot visit a node twice when walking a tree!");
2134044eb2f6SDimitry Andric DomWorklist.push_back(ChildN);
2135044eb2f6SDimitry Andric }
2136044eb2f6SDimitry Andric } while (!DomWorklist.empty());
2137044eb2f6SDimitry Andric }
2138044eb2f6SDimitry Andric
postUnswitch(Loop & L,LPMUpdater & U,StringRef LoopName,bool CurrentLoopValid,bool PartiallyInvariant,bool InjectedCondition,ArrayRef<Loop * > NewLoops)2139b1c73532SDimitry Andric void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName,
2140b1c73532SDimitry Andric bool CurrentLoopValid, bool PartiallyInvariant,
2141b1c73532SDimitry Andric bool InjectedCondition, ArrayRef<Loop *> NewLoops) {
2142b1c73532SDimitry Andric // If we did a non-trivial unswitch, we have added new (cloned) loops.
2143b1c73532SDimitry Andric if (!NewLoops.empty())
2144b1c73532SDimitry Andric U.addSiblingLoops(NewLoops);
2145b1c73532SDimitry Andric
2146b1c73532SDimitry Andric // If the current loop remains valid, we should revisit it to catch any
2147b1c73532SDimitry Andric // other unswitch opportunities. Otherwise, we need to mark it as deleted.
2148b1c73532SDimitry Andric if (CurrentLoopValid) {
2149b1c73532SDimitry Andric if (PartiallyInvariant) {
2150b1c73532SDimitry Andric // Mark the new loop as partially unswitched, to avoid unswitching on
2151b1c73532SDimitry Andric // the same condition again.
2152b1c73532SDimitry Andric auto &Context = L.getHeader()->getContext();
2153b1c73532SDimitry Andric MDNode *DisableUnswitchMD = MDNode::get(
2154b1c73532SDimitry Andric Context,
2155b1c73532SDimitry Andric MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
2156b1c73532SDimitry Andric MDNode *NewLoopID = makePostTransformationMetadata(
2157b1c73532SDimitry Andric Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
2158b1c73532SDimitry Andric {DisableUnswitchMD});
2159b1c73532SDimitry Andric L.setLoopID(NewLoopID);
2160b1c73532SDimitry Andric } else if (InjectedCondition) {
2161b1c73532SDimitry Andric // Do the same for injection of invariant conditions.
2162b1c73532SDimitry Andric auto &Context = L.getHeader()->getContext();
2163b1c73532SDimitry Andric MDNode *DisableUnswitchMD = MDNode::get(
2164b1c73532SDimitry Andric Context,
2165b1c73532SDimitry Andric MDString::get(Context, "llvm.loop.unswitch.injection.disable"));
2166b1c73532SDimitry Andric MDNode *NewLoopID = makePostTransformationMetadata(
2167b1c73532SDimitry Andric Context, L.getLoopID(), {"llvm.loop.unswitch.injection"},
2168b1c73532SDimitry Andric {DisableUnswitchMD});
2169b1c73532SDimitry Andric L.setLoopID(NewLoopID);
2170b1c73532SDimitry Andric } else
2171b1c73532SDimitry Andric U.revisitCurrentLoop();
2172b1c73532SDimitry Andric } else
2173b1c73532SDimitry Andric U.markLoopAsDeleted(L, LoopName);
2174b1c73532SDimitry Andric }
2175b1c73532SDimitry Andric
unswitchNontrivialInvariants(Loop & L,Instruction & TI,ArrayRef<Value * > Invariants,IVConditionInfo & PartialIVInfo,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,ScalarEvolution * SE,MemorySSAUpdater * MSSAU,LPMUpdater & LoopUpdater,bool InsertFreeze,bool InjectedCondition)2176d8e91e46SDimitry Andric static void unswitchNontrivialInvariants(
2177d8e91e46SDimitry Andric Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2178e3b55780SDimitry Andric IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
2179b1c73532SDimitry Andric AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2180b1c73532SDimitry Andric LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) {
2181eb11fae6SDimitry Andric auto *ParentBB = TI.getParent();
2182eb11fae6SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(&TI);
2183eb11fae6SDimitry Andric SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2184044eb2f6SDimitry Andric
2185b1c73532SDimitry Andric // Save the current loop name in a variable so that we can report it even
2186b1c73532SDimitry Andric // after it has been deleted.
2187b1c73532SDimitry Andric std::string LoopName(L.getName());
2188b1c73532SDimitry Andric
2189eb11fae6SDimitry Andric // We can only unswitch switches, conditional branches with an invariant
2190344a3780SDimitry Andric // condition, or combining invariant conditions with an instruction or
2191344a3780SDimitry Andric // partially invariant instructions.
21921d5ae102SDimitry Andric assert((SI || (BI && BI->isConditional())) &&
2193eb11fae6SDimitry Andric "Can only unswitch switches and conditional branch!");
2194344a3780SDimitry Andric bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2195344a3780SDimitry Andric bool FullUnswitch =
2196145449b1SDimitry Andric SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] &&
2197145449b1SDimitry Andric !PartiallyInvariant);
2198eb11fae6SDimitry Andric if (FullUnswitch)
2199eb11fae6SDimitry Andric assert(Invariants.size() == 1 &&
2200eb11fae6SDimitry Andric "Cannot have other invariants with full unswitching!");
2201eb11fae6SDimitry Andric else
2202145449b1SDimitry Andric assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) &&
2203eb11fae6SDimitry Andric "Partial unswitching requires an instruction as the condition!");
2204044eb2f6SDimitry Andric
2205d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
2206d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2207d8e91e46SDimitry Andric
2208eb11fae6SDimitry Andric // Constant and BBs tracking the cloned and continuing successor. When we are
2209eb11fae6SDimitry Andric // unswitching the entire condition, this can just be trivially chosen to
2210eb11fae6SDimitry Andric // unswitch towards `true`. However, when we are unswitching a set of
2211344a3780SDimitry Andric // invariants combined with `and` or `or` or partially invariant instructions,
2212344a3780SDimitry Andric // the combining operation determines the best direction to unswitch: we want
2213344a3780SDimitry Andric // to unswitch the direction that will collapse the branch.
2214eb11fae6SDimitry Andric bool Direction = true;
2215eb11fae6SDimitry Andric int ClonedSucc = 0;
2216eb11fae6SDimitry Andric if (!FullUnswitch) {
2217145449b1SDimitry Andric Value *Cond = skipTrivialSelect(BI->getCondition());
2218344a3780SDimitry Andric (void)Cond;
2219344a3780SDimitry Andric assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||
2220344a3780SDimitry Andric PartiallyInvariant) &&
2221344a3780SDimitry Andric "Only `or`, `and`, an `select`, partially invariant instructions "
2222344a3780SDimitry Andric "can combine invariants being unswitched.");
2223145449b1SDimitry Andric if (!match(Cond, m_LogicalOr())) {
2224145449b1SDimitry Andric if (match(Cond, m_LogicalAnd()) ||
2225344a3780SDimitry Andric (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2226eb11fae6SDimitry Andric Direction = false;
2227eb11fae6SDimitry Andric ClonedSucc = 1;
2228eb11fae6SDimitry Andric }
2229eb11fae6SDimitry Andric }
2230344a3780SDimitry Andric }
2231eb11fae6SDimitry Andric
2232eb11fae6SDimitry Andric BasicBlock *RetainedSuccBB =
2233eb11fae6SDimitry Andric BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2234eb11fae6SDimitry Andric SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2235eb11fae6SDimitry Andric if (BI)
2236eb11fae6SDimitry Andric UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2237eb11fae6SDimitry Andric else
2238eb11fae6SDimitry Andric for (auto Case : SI->cases())
2239eb11fae6SDimitry Andric if (Case.getCaseSuccessor() != RetainedSuccBB)
2240eb11fae6SDimitry Andric UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2241eb11fae6SDimitry Andric
2242eb11fae6SDimitry Andric assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
2243eb11fae6SDimitry Andric "Should not unswitch the same successor we are retaining!");
2244044eb2f6SDimitry Andric
2245044eb2f6SDimitry Andric // The branch should be in this exact loop. Any inner loop's invariant branch
2246044eb2f6SDimitry Andric // should be handled by unswitching that inner loop. The caller of this
2247044eb2f6SDimitry Andric // routine should filter out any candidates that remain (but were skipped for
2248044eb2f6SDimitry Andric // whatever reason).
2249044eb2f6SDimitry Andric assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!");
2250044eb2f6SDimitry Andric
2251044eb2f6SDimitry Andric // Compute the parent loop now before we start hacking on things.
2252044eb2f6SDimitry Andric Loop *ParentL = L.getParentLoop();
2253d8e91e46SDimitry Andric // Get blocks in RPO order for MSSA update, before changing the CFG.
2254d8e91e46SDimitry Andric LoopBlocksRPO LBRPO(&L);
2255d8e91e46SDimitry Andric if (MSSAU)
2256d8e91e46SDimitry Andric LBRPO.perform(&LI);
2257044eb2f6SDimitry Andric
2258044eb2f6SDimitry Andric // Compute the outer-most loop containing one of our exit blocks. This is the
2259044eb2f6SDimitry Andric // furthest up our loopnest which can be mutated, which we will use below to
2260044eb2f6SDimitry Andric // update things.
2261044eb2f6SDimitry Andric Loop *OuterExitL = &L;
2262e3b55780SDimitry Andric SmallVector<BasicBlock *, 4> ExitBlocks;
2263e3b55780SDimitry Andric L.getUniqueExitBlocks(ExitBlocks);
2264044eb2f6SDimitry Andric for (auto *ExitBB : ExitBlocks) {
22657fa27ce4SDimitry Andric // ExitBB can be an exit block for several levels in the loop nest. Make
22667fa27ce4SDimitry Andric // sure we find the top most.
22677fa27ce4SDimitry Andric Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
2268044eb2f6SDimitry Andric if (!NewOuterExitL) {
2269044eb2f6SDimitry Andric // We exited the entire nest with this block, so we're done.
2270044eb2f6SDimitry Andric OuterExitL = nullptr;
2271044eb2f6SDimitry Andric break;
2272044eb2f6SDimitry Andric }
2273044eb2f6SDimitry Andric if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2274044eb2f6SDimitry Andric OuterExitL = NewOuterExitL;
2275044eb2f6SDimitry Andric }
2276044eb2f6SDimitry Andric
2277eb11fae6SDimitry Andric // At this point, we're definitely going to unswitch something so invalidate
2278eb11fae6SDimitry Andric // any cached information in ScalarEvolution for the outer most loop
2279eb11fae6SDimitry Andric // containing an exit block and all nested loops.
2280eb11fae6SDimitry Andric if (SE) {
2281eb11fae6SDimitry Andric if (OuterExitL)
2282eb11fae6SDimitry Andric SE->forgetLoop(OuterExitL);
2283eb11fae6SDimitry Andric else
2284eb11fae6SDimitry Andric SE->forgetTopmostLoop(&L);
2285e3b55780SDimitry Andric SE->forgetBlockAndLoopDispositions();
2286044eb2f6SDimitry Andric }
2287eb11fae6SDimitry Andric
2288eb11fae6SDimitry Andric // If the edge from this terminator to a successor dominates that successor,
2289eb11fae6SDimitry Andric // store a map from each block in its dominator subtree to it. This lets us
2290eb11fae6SDimitry Andric // tell when cloning for a particular successor if a block is dominated by
2291eb11fae6SDimitry Andric // some *other* successor with a single data structure. We use this to
2292eb11fae6SDimitry Andric // significantly reduce cloning.
2293eb11fae6SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
2294e3b55780SDimitry Andric for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
2295e3b55780SDimitry Andric UnswitchedSuccBBs))
2296eb11fae6SDimitry Andric if (SuccBB->getUniquePredecessor() ||
2297eb11fae6SDimitry Andric llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2298eb11fae6SDimitry Andric return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2299eb11fae6SDimitry Andric }))
2300eb11fae6SDimitry Andric visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2301eb11fae6SDimitry Andric DominatingSucc[BB] = SuccBB;
2302eb11fae6SDimitry Andric return true;
2303044eb2f6SDimitry Andric });
2304044eb2f6SDimitry Andric
2305044eb2f6SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert
2306044eb2f6SDimitry Andric // the conditional branch. We will change the preheader to have a conditional
2307044eb2f6SDimitry Andric // branch on LoopCond. The original preheader will become the split point
2308044eb2f6SDimitry Andric // between the unswitched versions, and we will have a new preheader for the
2309044eb2f6SDimitry Andric // original loop.
2310044eb2f6SDimitry Andric BasicBlock *SplitBB = L.getLoopPreheader();
2311d8e91e46SDimitry Andric BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2312044eb2f6SDimitry Andric
2313eb11fae6SDimitry Andric // Keep track of the dominator tree updates needed.
2314eb11fae6SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2315044eb2f6SDimitry Andric
2316eb11fae6SDimitry Andric // Clone the loop for each unswitched successor.
2317eb11fae6SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2318eb11fae6SDimitry Andric VMaps.reserve(UnswitchedSuccBBs.size());
2319eb11fae6SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
2320eb11fae6SDimitry Andric for (auto *SuccBB : UnswitchedSuccBBs) {
2321eb11fae6SDimitry Andric VMaps.emplace_back(new ValueToValueMapTy());
2322eb11fae6SDimitry Andric ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2323eb11fae6SDimitry Andric L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2324e3b55780SDimitry Andric DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
2325eb11fae6SDimitry Andric }
2326eb11fae6SDimitry Andric
2327b60736ecSDimitry Andric // Drop metadata if we may break its semantics by moving this instr into the
2328b60736ecSDimitry Andric // split block.
2329b60736ecSDimitry Andric if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2330b60736ecSDimitry Andric if (DropNonTrivialImplicitNullChecks)
2331b60736ecSDimitry Andric // Do not spend time trying to understand if we can keep it, just drop it
2332b60736ecSDimitry Andric // to save compile time.
2333b60736ecSDimitry Andric TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2334b60736ecSDimitry Andric else {
2335b60736ecSDimitry Andric // It is only legal to preserve make.implicit metadata if we are
2336b60736ecSDimitry Andric // guaranteed no reach implicit null check after following this branch.
2337b60736ecSDimitry Andric ICFLoopSafetyInfo SafetyInfo;
2338b60736ecSDimitry Andric SafetyInfo.computeLoopSafetyInfo(&L);
2339b60736ecSDimitry Andric if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2340b60736ecSDimitry Andric TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2341b60736ecSDimitry Andric }
2342b60736ecSDimitry Andric }
2343b60736ecSDimitry Andric
2344eb11fae6SDimitry Andric // The stitching of the branched code back together depends on whether we're
2345eb11fae6SDimitry Andric // doing full unswitching or not with the exception that we always want to
2346eb11fae6SDimitry Andric // nuke the initial terminator placed in the split block.
2347eb11fae6SDimitry Andric SplitBB->getTerminator()->eraseFromParent();
2348eb11fae6SDimitry Andric if (FullUnswitch) {
2349d8e91e46SDimitry Andric // Keep a clone of the terminator for MSSA updates.
2350d8e91e46SDimitry Andric Instruction *NewTI = TI.clone();
2351e3b55780SDimitry Andric NewTI->insertInto(ParentBB, ParentBB->end());
2352d8e91e46SDimitry Andric
2353ac9a064cSDimitry Andric // Splice the terminator from the original loop and rewrite its
2354ac9a064cSDimitry Andric // successors.
2355ac9a064cSDimitry Andric TI.moveBefore(*SplitBB, SplitBB->end());
2356ac9a064cSDimitry Andric TI.dropLocation();
2357ac9a064cSDimitry Andric
2358d8e91e46SDimitry Andric // First wire up the moved terminator to the preheaders.
2359d8e91e46SDimitry Andric if (BI) {
2360d8e91e46SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2361d8e91e46SDimitry Andric BI->setSuccessor(ClonedSucc, ClonedPH);
2362d8e91e46SDimitry Andric BI->setSuccessor(1 - ClonedSucc, LoopPH);
23637fa27ce4SDimitry Andric Value *Cond = skipTrivialSelect(BI->getCondition());
2364ac9a064cSDimitry Andric if (InsertFreeze) {
2365ac9a064cSDimitry Andric // We don't give any debug location to the new freeze, because the
2366ac9a064cSDimitry Andric // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
2367ac9a064cSDimitry Andric // out of the loop.
2368ac9a064cSDimitry Andric Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
2369ac9a064cSDimitry Andric }
23707fa27ce4SDimitry Andric BI->setCondition(Cond);
2371d8e91e46SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2372d8e91e46SDimitry Andric } else {
2373d8e91e46SDimitry Andric assert(SI && "Must either be a branch or switch!");
2374d8e91e46SDimitry Andric
2375d8e91e46SDimitry Andric // Walk the cases and directly update their successors.
2376d8e91e46SDimitry Andric assert(SI->getDefaultDest() == RetainedSuccBB &&
2377d8e91e46SDimitry Andric "Not retaining default successor!");
2378d8e91e46SDimitry Andric SI->setDefaultDest(LoopPH);
2379e3b55780SDimitry Andric for (const auto &Case : SI->cases())
2380d8e91e46SDimitry Andric if (Case.getCaseSuccessor() == RetainedSuccBB)
2381d8e91e46SDimitry Andric Case.setSuccessor(LoopPH);
2382d8e91e46SDimitry Andric else
2383d8e91e46SDimitry Andric Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2384d8e91e46SDimitry Andric
2385e3b55780SDimitry Andric if (InsertFreeze)
2386ac9a064cSDimitry Andric SI->setCondition(new FreezeInst(SI->getCondition(),
2387ac9a064cSDimitry Andric SI->getCondition()->getName() + ".fr",
2388ac9a064cSDimitry Andric SI->getIterator()));
2389e3b55780SDimitry Andric
2390d8e91e46SDimitry Andric // We need to use the set to populate domtree updates as even when there
2391d8e91e46SDimitry Andric // are multiple cases pointing at the same successor we only want to
2392d8e91e46SDimitry Andric // remove and insert one edge in the domtree.
2393d8e91e46SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2394d8e91e46SDimitry Andric DTUpdates.push_back(
2395d8e91e46SDimitry Andric {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2396d8e91e46SDimitry Andric }
2397d8e91e46SDimitry Andric
2398d8e91e46SDimitry Andric if (MSSAU) {
2399d8e91e46SDimitry Andric DT.applyUpdates(DTUpdates);
2400d8e91e46SDimitry Andric DTUpdates.clear();
2401d8e91e46SDimitry Andric
2402d8e91e46SDimitry Andric // Remove all but one edge to the retained block and all unswitched
2403d8e91e46SDimitry Andric // blocks. This is to avoid having duplicate entries in the cloned Phis,
2404d8e91e46SDimitry Andric // when we know we only keep a single edge for each case.
2405d8e91e46SDimitry Andric MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2406d8e91e46SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2407d8e91e46SDimitry Andric MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2408d8e91e46SDimitry Andric
2409d8e91e46SDimitry Andric for (auto &VMap : VMaps)
2410d8e91e46SDimitry Andric MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2411d8e91e46SDimitry Andric /*IgnoreIncomingWithNoClones=*/true);
2412d8e91e46SDimitry Andric MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2413d8e91e46SDimitry Andric
2414d8e91e46SDimitry Andric // Remove all edges to unswitched blocks.
2415d8e91e46SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2416d8e91e46SDimitry Andric MSSAU->removeEdge(ParentBB, SuccBB);
2417d8e91e46SDimitry Andric }
2418d8e91e46SDimitry Andric
2419d8e91e46SDimitry Andric // Now unhook the successor relationship as we'll be replacing
2420eb11fae6SDimitry Andric // the terminator with a direct branch. This is much simpler for branches
2421eb11fae6SDimitry Andric // than switches so we handle those first.
2422eb11fae6SDimitry Andric if (BI) {
2423eb11fae6SDimitry Andric // Remove the parent as a predecessor of the unswitched successor.
2424eb11fae6SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 &&
2425eb11fae6SDimitry Andric "Only one possible unswitched block for a branch!");
2426eb11fae6SDimitry Andric BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2427eb11fae6SDimitry Andric UnswitchedSuccBB->removePredecessor(ParentBB,
2428e6d15924SDimitry Andric /*KeepOneInputPHIs*/ true);
2429eb11fae6SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2430eb11fae6SDimitry Andric } else {
2431eb11fae6SDimitry Andric // Note that we actually want to remove the parent block as a predecessor
2432eb11fae6SDimitry Andric // of *every* case successor. The case successor is either unswitched,
2433eb11fae6SDimitry Andric // completely eliminating an edge from the parent to that successor, or it
2434eb11fae6SDimitry Andric // is a duplicate edge to the retained successor as the retained successor
2435eb11fae6SDimitry Andric // is always the default successor and as we'll replace this with a direct
2436eb11fae6SDimitry Andric // branch we no longer need the duplicate entries in the PHI nodes.
2437d8e91e46SDimitry Andric SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2438d8e91e46SDimitry Andric assert(NewSI->getDefaultDest() == RetainedSuccBB &&
2439eb11fae6SDimitry Andric "Not retaining default successor!");
2440e3b55780SDimitry Andric for (const auto &Case : NewSI->cases())
2441eb11fae6SDimitry Andric Case.getCaseSuccessor()->removePredecessor(
2442eb11fae6SDimitry Andric ParentBB,
2443e6d15924SDimitry Andric /*KeepOneInputPHIs*/ true);
2444eb11fae6SDimitry Andric
2445eb11fae6SDimitry Andric // We need to use the set to populate domtree updates as even when there
2446eb11fae6SDimitry Andric // are multiple cases pointing at the same successor we only want to
2447eb11fae6SDimitry Andric // remove and insert one edge in the domtree.
2448eb11fae6SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2449eb11fae6SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2450eb11fae6SDimitry Andric }
2451eb11fae6SDimitry Andric
2452eb11fae6SDimitry Andric // Create a new unconditional branch to the continuing block (as opposed to
2453eb11fae6SDimitry Andric // the one cloned).
2454ac9a064cSDimitry Andric Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
2455ac9a064cSDimitry Andric NewBI->setDebugLoc(NewTI->getDebugLoc());
2456ac9a064cSDimitry Andric
2457ac9a064cSDimitry Andric // After MSSAU update, remove the cloned terminator instruction NewTI.
2458ac9a064cSDimitry Andric NewTI->eraseFromParent();
2459eb11fae6SDimitry Andric } else {
2460eb11fae6SDimitry Andric assert(BI && "Only branches have partial unswitching.");
2461eb11fae6SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 &&
2462eb11fae6SDimitry Andric "Only one possible unswitched block for a branch!");
2463eb11fae6SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2464eb11fae6SDimitry Andric // When doing a partial unswitch, we have to do a bit more work to build up
2465eb11fae6SDimitry Andric // the branch in the split block.
2466344a3780SDimitry Andric if (PartiallyInvariant)
2467344a3780SDimitry Andric buildPartialInvariantUnswitchConditionalBranch(
2468344a3780SDimitry Andric *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2469145449b1SDimitry Andric else {
2470145449b1SDimitry Andric buildPartialUnswitchConditionalBranch(
2471145449b1SDimitry Andric *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2472145449b1SDimitry Andric FreezeLoopUnswitchCond, BI, &AC, DT);
2473145449b1SDimitry Andric }
2474eb11fae6SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
24751d5ae102SDimitry Andric
24761d5ae102SDimitry Andric if (MSSAU) {
24771d5ae102SDimitry Andric DT.applyUpdates(DTUpdates);
24781d5ae102SDimitry Andric DTUpdates.clear();
24791d5ae102SDimitry Andric
24801d5ae102SDimitry Andric // Perform MSSA cloning updates.
24811d5ae102SDimitry Andric for (auto &VMap : VMaps)
24821d5ae102SDimitry Andric MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
24831d5ae102SDimitry Andric /*IgnoreIncomingWithNoClones=*/true);
24841d5ae102SDimitry Andric MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
24851d5ae102SDimitry Andric }
2486eb11fae6SDimitry Andric }
2487eb11fae6SDimitry Andric
2488eb11fae6SDimitry Andric // Apply the updates accumulated above to get an up-to-date dominator tree.
2489eb11fae6SDimitry Andric DT.applyUpdates(DTUpdates);
2490eb11fae6SDimitry Andric
2491eb11fae6SDimitry Andric // Now that we have an accurate dominator tree, first delete the dead cloned
2492eb11fae6SDimitry Andric // blocks so that we can accurately build any cloned loops. It is important to
2493eb11fae6SDimitry Andric // not delete the blocks from the original loop yet because we still want to
2494eb11fae6SDimitry Andric // reference the original loop to understand the cloned loop's structure.
2495d8e91e46SDimitry Andric deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2496044eb2f6SDimitry Andric
2497044eb2f6SDimitry Andric // Build the cloned loop structure itself. This may be substantially
2498044eb2f6SDimitry Andric // different from the original structure due to the simplified CFG. This also
2499044eb2f6SDimitry Andric // handles inserting all the cloned blocks into the correct loops.
2500044eb2f6SDimitry Andric SmallVector<Loop *, 4> NonChildClonedLoops;
2501eb11fae6SDimitry Andric for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2502eb11fae6SDimitry Andric buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2503044eb2f6SDimitry Andric
2504eb11fae6SDimitry Andric // Now that our cloned loops have been built, we can update the original loop.
2505eb11fae6SDimitry Andric // First we delete the dead blocks from it and then we rebuild the loop
2506eb11fae6SDimitry Andric // structure taking these deletions into account.
2507b1c73532SDimitry Andric deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater);
2508d8e91e46SDimitry Andric
2509d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
2510d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2511d8e91e46SDimitry Andric
2512044eb2f6SDimitry Andric SmallVector<Loop *, 4> HoistedLoops;
2513e3b55780SDimitry Andric bool IsStillLoop =
2514e3b55780SDimitry Andric rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
2515044eb2f6SDimitry Andric
2516d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
2517d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2518d8e91e46SDimitry Andric
2519eb11fae6SDimitry Andric // This transformation has a high risk of corrupting the dominator tree, and
2520eb11fae6SDimitry Andric // the below steps to rebuild loop structures will result in hard to debug
2521eb11fae6SDimitry Andric // errors in that case so verify that the dominator tree is sane first.
2522eb11fae6SDimitry Andric // FIXME: Remove this when the bugs stop showing up and rely on existing
2523eb11fae6SDimitry Andric // verification steps.
2524eb11fae6SDimitry Andric assert(DT.verify(DominatorTree::VerificationLevel::Fast));
2525eb11fae6SDimitry Andric
2526344a3780SDimitry Andric if (BI && !PartiallyInvariant) {
2527eb11fae6SDimitry Andric // If we unswitched a branch which collapses the condition to a known
2528eb11fae6SDimitry Andric // constant we want to replace all the uses of the invariants within both
2529eb11fae6SDimitry Andric // the original and cloned blocks. We do this here so that we can use the
2530eb11fae6SDimitry Andric // now updated dominator tree to identify which side the users are on.
2531eb11fae6SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 &&
2532eb11fae6SDimitry Andric "Only one possible unswitched block for a branch!");
2533eb11fae6SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2534d8e91e46SDimitry Andric
2535d8e91e46SDimitry Andric // When considering multiple partially-unswitched invariants
2536d8e91e46SDimitry Andric // we cant just go replace them with constants in both branches.
2537d8e91e46SDimitry Andric //
2538d8e91e46SDimitry Andric // For 'AND' we infer that true branch ("continue") means true
2539d8e91e46SDimitry Andric // for each invariant operand.
2540d8e91e46SDimitry Andric // For 'OR' we can infer that false branch ("continue") means false
2541d8e91e46SDimitry Andric // for each invariant operand.
2542d8e91e46SDimitry Andric // So it happens that for multiple-partial case we dont replace
2543d8e91e46SDimitry Andric // in the unswitched branch.
2544344a3780SDimitry Andric bool ReplaceUnswitched =
2545344a3780SDimitry Andric FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2546d8e91e46SDimitry Andric
2547eb11fae6SDimitry Andric ConstantInt *UnswitchedReplacement =
2548eb11fae6SDimitry Andric Direction ? ConstantInt::getTrue(BI->getContext())
2549eb11fae6SDimitry Andric : ConstantInt::getFalse(BI->getContext());
2550eb11fae6SDimitry Andric ConstantInt *ContinueReplacement =
2551eb11fae6SDimitry Andric Direction ? ConstantInt::getFalse(BI->getContext())
2552eb11fae6SDimitry Andric : ConstantInt::getTrue(BI->getContext());
2553c0981da4SDimitry Andric for (Value *Invariant : Invariants) {
2554c0981da4SDimitry Andric assert(!isa<Constant>(Invariant) &&
2555c0981da4SDimitry Andric "Should not be replacing constant values!");
2556344a3780SDimitry Andric // Use make_early_inc_range here as set invalidates the iterator.
2557344a3780SDimitry Andric for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2558344a3780SDimitry Andric Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2559eb11fae6SDimitry Andric if (!UserI)
2560eb11fae6SDimitry Andric continue;
2561eb11fae6SDimitry Andric
2562eb11fae6SDimitry Andric // Replace it with the 'continue' side if in the main loop body, and the
2563eb11fae6SDimitry Andric // unswitched if in the cloned blocks.
2564eb11fae6SDimitry Andric if (DT.dominates(LoopPH, UserI->getParent()))
2565344a3780SDimitry Andric U.set(ContinueReplacement);
2566d8e91e46SDimitry Andric else if (ReplaceUnswitched &&
2567d8e91e46SDimitry Andric DT.dominates(ClonedPH, UserI->getParent()))
2568344a3780SDimitry Andric U.set(UnswitchedReplacement);
2569eb11fae6SDimitry Andric }
2570eb11fae6SDimitry Andric }
2571c0981da4SDimitry Andric }
2572044eb2f6SDimitry Andric
2573044eb2f6SDimitry Andric // We can change which blocks are exit blocks of all the cloned sibling
2574044eb2f6SDimitry Andric // loops, the current loop, and any parent loops which shared exit blocks
2575044eb2f6SDimitry Andric // with the current loop. As a consequence, we need to re-form LCSSA for
2576044eb2f6SDimitry Andric // them. But we shouldn't need to re-form LCSSA for any child loops.
2577044eb2f6SDimitry Andric // FIXME: This could be made more efficient by tracking which exit blocks are
2578044eb2f6SDimitry Andric // new, and focusing on them, but that isn't likely to be necessary.
2579044eb2f6SDimitry Andric //
2580044eb2f6SDimitry Andric // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2581044eb2f6SDimitry Andric // loop nest and update every loop that could have had its exits changed. We
2582044eb2f6SDimitry Andric // also need to cover any intervening loops. We add all of these loops to
2583044eb2f6SDimitry Andric // a list and sort them by loop depth to achieve this without updating
2584044eb2f6SDimitry Andric // unnecessary loops.
2585eb11fae6SDimitry Andric auto UpdateLoop = [&](Loop &UpdateL) {
2586044eb2f6SDimitry Andric #ifndef NDEBUG
2587eb11fae6SDimitry Andric UpdateL.verifyLoop();
2588eb11fae6SDimitry Andric for (Loop *ChildL : UpdateL) {
2589eb11fae6SDimitry Andric ChildL->verifyLoop();
2590044eb2f6SDimitry Andric assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&
2591044eb2f6SDimitry Andric "Perturbed a child loop's LCSSA form!");
2592eb11fae6SDimitry Andric }
2593044eb2f6SDimitry Andric #endif
2594044eb2f6SDimitry Andric // First build LCSSA for this loop so that we can preserve it when
2595044eb2f6SDimitry Andric // forming dedicated exits. We don't want to perturb some other loop's
2596044eb2f6SDimitry Andric // LCSSA while doing that CFG edit.
2597706b4fc4SDimitry Andric formLCSSA(UpdateL, DT, &LI, SE);
2598044eb2f6SDimitry Andric
2599044eb2f6SDimitry Andric // For loops reached by this loop's original exit blocks we may
2600044eb2f6SDimitry Andric // introduced new, non-dedicated exits. At least try to re-form dedicated
2601044eb2f6SDimitry Andric // exits for these loops. This may fail if they couldn't have dedicated
2602044eb2f6SDimitry Andric // exits to start with.
2603e6d15924SDimitry Andric formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2604eb11fae6SDimitry Andric };
2605eb11fae6SDimitry Andric
2606eb11fae6SDimitry Andric // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2607eb11fae6SDimitry Andric // and we can do it in any order as they don't nest relative to each other.
2608eb11fae6SDimitry Andric //
2609eb11fae6SDimitry Andric // Also check if any of the loops we have updated have become top-level loops
2610eb11fae6SDimitry Andric // as that will necessitate widening the outer loop scope.
2611eb11fae6SDimitry Andric for (Loop *UpdatedL :
2612eb11fae6SDimitry Andric llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2613eb11fae6SDimitry Andric UpdateLoop(*UpdatedL);
2614b60736ecSDimitry Andric if (UpdatedL->isOutermost())
2615eb11fae6SDimitry Andric OuterExitL = nullptr;
2616044eb2f6SDimitry Andric }
2617eb11fae6SDimitry Andric if (IsStillLoop) {
2618eb11fae6SDimitry Andric UpdateLoop(L);
2619b60736ecSDimitry Andric if (L.isOutermost())
2620eb11fae6SDimitry Andric OuterExitL = nullptr;
2621044eb2f6SDimitry Andric }
2622044eb2f6SDimitry Andric
2623eb11fae6SDimitry Andric // If the original loop had exit blocks, walk up through the outer most loop
2624eb11fae6SDimitry Andric // of those exit blocks to update LCSSA and form updated dedicated exits.
2625eb11fae6SDimitry Andric if (OuterExitL != &L)
2626eb11fae6SDimitry Andric for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2627eb11fae6SDimitry Andric OuterL = OuterL->getParentLoop())
2628eb11fae6SDimitry Andric UpdateLoop(*OuterL);
2629eb11fae6SDimitry Andric
2630044eb2f6SDimitry Andric #ifndef NDEBUG
2631044eb2f6SDimitry Andric // Verify the entire loop structure to catch any incorrect updates before we
2632044eb2f6SDimitry Andric // progress in the pass pipeline.
2633044eb2f6SDimitry Andric LI.verify(DT);
2634044eb2f6SDimitry Andric #endif
2635044eb2f6SDimitry Andric
2636044eb2f6SDimitry Andric // Now that we've unswitched something, make callbacks to report the changes.
2637044eb2f6SDimitry Andric // For that we need to merge together the updated loops and the cloned loops
2638044eb2f6SDimitry Andric // and check whether the original loop survived.
2639044eb2f6SDimitry Andric SmallVector<Loop *, 4> SibLoops;
2640044eb2f6SDimitry Andric for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2641044eb2f6SDimitry Andric if (UpdatedL->getParentLoop() == ParentL)
2642044eb2f6SDimitry Andric SibLoops.push_back(UpdatedL);
2643b1c73532SDimitry Andric postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant,
2644b1c73532SDimitry Andric InjectedCondition, SibLoops);
2645044eb2f6SDimitry Andric
2646d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
2647d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2648d8e91e46SDimitry Andric
2649d8e91e46SDimitry Andric if (BI)
2650044eb2f6SDimitry Andric ++NumBranches;
2651d8e91e46SDimitry Andric else
2652d8e91e46SDimitry Andric ++NumSwitches;
2653044eb2f6SDimitry Andric }
2654044eb2f6SDimitry Andric
2655044eb2f6SDimitry Andric /// Recursively compute the cost of a dominator subtree based on the per-block
2656044eb2f6SDimitry Andric /// cost map provided.
2657044eb2f6SDimitry Andric ///
2658044eb2f6SDimitry Andric /// The recursive computation is memozied into the provided DT-indexed cost map
2659044eb2f6SDimitry Andric /// to allow querying it for most nodes in the domtree without it becoming
2660044eb2f6SDimitry Andric /// quadratic.
computeDomSubtreeCost(DomTreeNode & N,const SmallDenseMap<BasicBlock *,InstructionCost,4> & BBCostMap,SmallDenseMap<DomTreeNode *,InstructionCost,4> & DTCostMap)2661344a3780SDimitry Andric static InstructionCost computeDomSubtreeCost(
2662344a3780SDimitry Andric DomTreeNode &N,
2663344a3780SDimitry Andric const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
2664344a3780SDimitry Andric SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
2665044eb2f6SDimitry Andric // Don't accumulate cost (or recurse through) blocks not in our block cost
2666044eb2f6SDimitry Andric // map and thus not part of the duplication cost being considered.
2667044eb2f6SDimitry Andric auto BBCostIt = BBCostMap.find(N.getBlock());
2668044eb2f6SDimitry Andric if (BBCostIt == BBCostMap.end())
2669044eb2f6SDimitry Andric return 0;
2670044eb2f6SDimitry Andric
2671044eb2f6SDimitry Andric // Lookup this node to see if we already computed its cost.
2672044eb2f6SDimitry Andric auto DTCostIt = DTCostMap.find(&N);
2673044eb2f6SDimitry Andric if (DTCostIt != DTCostMap.end())
2674044eb2f6SDimitry Andric return DTCostIt->second;
2675044eb2f6SDimitry Andric
2676044eb2f6SDimitry Andric // If not, we have to compute it. We can't use insert above and update
2677044eb2f6SDimitry Andric // because computing the cost may insert more things into the map.
2678344a3780SDimitry Andric InstructionCost Cost = std::accumulate(
2679344a3780SDimitry Andric N.begin(), N.end(), BBCostIt->second,
2680344a3780SDimitry Andric [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2681044eb2f6SDimitry Andric return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2682044eb2f6SDimitry Andric });
2683044eb2f6SDimitry Andric bool Inserted = DTCostMap.insert({&N, Cost}).second;
2684044eb2f6SDimitry Andric (void)Inserted;
2685044eb2f6SDimitry Andric assert(Inserted && "Should not insert a node while visiting children!");
2686044eb2f6SDimitry Andric return Cost;
2687044eb2f6SDimitry Andric }
2688044eb2f6SDimitry Andric
26897fa27ce4SDimitry Andric /// Turns a select instruction into implicit control flow branch,
26907fa27ce4SDimitry Andric /// making the following replacement:
26917fa27ce4SDimitry Andric ///
26927fa27ce4SDimitry Andric /// head:
26937fa27ce4SDimitry Andric /// --code before select--
26947fa27ce4SDimitry Andric /// select %cond, %trueval, %falseval
26957fa27ce4SDimitry Andric /// --code after select--
26967fa27ce4SDimitry Andric ///
26977fa27ce4SDimitry Andric /// into
26987fa27ce4SDimitry Andric ///
26997fa27ce4SDimitry Andric /// head:
27007fa27ce4SDimitry Andric /// --code before select--
27017fa27ce4SDimitry Andric /// br i1 %cond, label %then, label %tail
27027fa27ce4SDimitry Andric ///
27037fa27ce4SDimitry Andric /// then:
27047fa27ce4SDimitry Andric /// br %tail
27057fa27ce4SDimitry Andric ///
27067fa27ce4SDimitry Andric /// tail:
27077fa27ce4SDimitry Andric /// phi [ %trueval, %then ], [ %falseval, %head]
27087fa27ce4SDimitry Andric /// unreachable
27097fa27ce4SDimitry Andric ///
27107fa27ce4SDimitry Andric /// It also makes all relevant DT and LI updates, so that all structures are in
27117fa27ce4SDimitry Andric /// valid state after this transform.
turnSelectIntoBranch(SelectInst * SI,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU,AssumptionCache * AC)27127fa27ce4SDimitry Andric static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT,
27137fa27ce4SDimitry Andric LoopInfo &LI, MemorySSAUpdater *MSSAU,
27147fa27ce4SDimitry Andric AssumptionCache *AC) {
27157fa27ce4SDimitry Andric LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
27167fa27ce4SDimitry Andric BasicBlock *HeadBB = SI->getParent();
27177fa27ce4SDimitry Andric
27187fa27ce4SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
27197fa27ce4SDimitry Andric SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
27207fa27ce4SDimitry Andric SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
27217fa27ce4SDimitry Andric auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
27227fa27ce4SDimitry Andric BasicBlock *ThenBB = CondBr->getSuccessor(0),
27237fa27ce4SDimitry Andric *TailBB = CondBr->getSuccessor(1);
27247fa27ce4SDimitry Andric if (MSSAU)
27257fa27ce4SDimitry Andric MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
27267fa27ce4SDimitry Andric
2727ac9a064cSDimitry Andric PHINode *Phi =
2728ac9a064cSDimitry Andric PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
27297fa27ce4SDimitry Andric Phi->addIncoming(SI->getTrueValue(), ThenBB);
27307fa27ce4SDimitry Andric Phi->addIncoming(SI->getFalseValue(), HeadBB);
2731ac9a064cSDimitry Andric Phi->setDebugLoc(SI->getDebugLoc());
27327fa27ce4SDimitry Andric SI->replaceAllUsesWith(Phi);
27337fa27ce4SDimitry Andric SI->eraseFromParent();
27347fa27ce4SDimitry Andric
27357fa27ce4SDimitry Andric if (MSSAU && VerifyMemorySSA)
27367fa27ce4SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
27377fa27ce4SDimitry Andric
27387fa27ce4SDimitry Andric ++NumSelects;
27397fa27ce4SDimitry Andric return CondBr;
27407fa27ce4SDimitry Andric }
27417fa27ce4SDimitry Andric
2742d8e91e46SDimitry Andric /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2743d8e91e46SDimitry Andric /// making the following replacement:
2744d8e91e46SDimitry Andric ///
2745d8e91e46SDimitry Andric /// --code before guard--
2746d8e91e46SDimitry Andric /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2747d8e91e46SDimitry Andric /// --code after guard--
2748d8e91e46SDimitry Andric ///
2749d8e91e46SDimitry Andric /// into
2750d8e91e46SDimitry Andric ///
2751d8e91e46SDimitry Andric /// --code before guard--
2752d8e91e46SDimitry Andric /// br i1 %cond, label %guarded, label %deopt
2753d8e91e46SDimitry Andric ///
2754d8e91e46SDimitry Andric /// guarded:
2755d8e91e46SDimitry Andric /// --code after guard--
2756d8e91e46SDimitry Andric ///
2757d8e91e46SDimitry Andric /// deopt:
2758d8e91e46SDimitry Andric /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2759d8e91e46SDimitry Andric /// unreachable
2760d8e91e46SDimitry Andric ///
2761d8e91e46SDimitry Andric /// It also makes all relevant DT and LI updates, so that all structures are in
2762d8e91e46SDimitry Andric /// valid state after this transform.
turnGuardIntoBranch(IntrinsicInst * GI,Loop & L,DominatorTree & DT,LoopInfo & LI,MemorySSAUpdater * MSSAU)2763e3b55780SDimitry Andric static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2764e3b55780SDimitry Andric DominatorTree &DT, LoopInfo &LI,
2765e3b55780SDimitry Andric MemorySSAUpdater *MSSAU) {
2766d8e91e46SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2767d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
2768d8e91e46SDimitry Andric BasicBlock *CheckBB = GI->getParent();
2769d8e91e46SDimitry Andric
2770d8e91e46SDimitry Andric if (MSSAU && VerifyMemorySSA)
2771d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2772d8e91e46SDimitry Andric
27737fa27ce4SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
2774d8e91e46SDimitry Andric Instruction *DeoptBlockTerm =
27757fa27ce4SDimitry Andric SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
27767fa27ce4SDimitry Andric GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
2777d8e91e46SDimitry Andric BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2778d8e91e46SDimitry Andric // SplitBlockAndInsertIfThen inserts control flow that branches to
2779d8e91e46SDimitry Andric // DeoptBlockTerm if the condition is true. We want the opposite.
2780d8e91e46SDimitry Andric CheckBI->swapSuccessors();
2781d8e91e46SDimitry Andric
2782d8e91e46SDimitry Andric BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2783d8e91e46SDimitry Andric GuardedBlock->setName("guarded");
2784d8e91e46SDimitry Andric CheckBI->getSuccessor(1)->setName("deopt");
2785d8e91e46SDimitry Andric BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2786d8e91e46SDimitry Andric
2787d8e91e46SDimitry Andric if (MSSAU)
2788d8e91e46SDimitry Andric MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2789d8e91e46SDimitry Andric
2790d8e91e46SDimitry Andric GI->moveBefore(DeoptBlockTerm);
2791d8e91e46SDimitry Andric GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2792d8e91e46SDimitry Andric
2793d8e91e46SDimitry Andric if (MSSAU) {
2794d8e91e46SDimitry Andric MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2795706b4fc4SDimitry Andric MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2796d8e91e46SDimitry Andric if (VerifyMemorySSA)
2797d8e91e46SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
2798d8e91e46SDimitry Andric }
2799d8e91e46SDimitry Andric
28007fa27ce4SDimitry Andric if (VerifyLoopInfo)
28017fa27ce4SDimitry Andric LI.verify(DT);
2802d8e91e46SDimitry Andric ++NumGuards;
2803d8e91e46SDimitry Andric return CheckBI;
2804d8e91e46SDimitry Andric }
2805d8e91e46SDimitry Andric
2806d8e91e46SDimitry Andric /// Cost multiplier is a way to limit potentially exponential behavior
2807d8e91e46SDimitry Andric /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2808d8e91e46SDimitry Andric /// candidates available. Also accounting for the number of "sibling" loops with
2809d8e91e46SDimitry Andric /// the idea to account for previous unswitches that already happened on this
2810d8e91e46SDimitry Andric /// cluster of loops. There was an attempt to keep this formula simple,
2811d8e91e46SDimitry Andric /// just enough to limit the worst case behavior. Even if it is not that simple
2812d8e91e46SDimitry Andric /// now it is still not an attempt to provide a detailed heuristic size
2813d8e91e46SDimitry Andric /// prediction.
2814d8e91e46SDimitry Andric ///
2815d8e91e46SDimitry Andric /// TODO: Make a proper accounting of "explosion" effect for all kinds of
2816d8e91e46SDimitry Andric /// unswitch candidates, making adequate predictions instead of wild guesses.
2817d8e91e46SDimitry Andric /// That requires knowing not just the number of "remaining" candidates but
2818d8e91e46SDimitry Andric /// also costs of unswitching for each of these candidates.
CalculateUnswitchCostMultiplier(const Instruction & TI,const Loop & L,const LoopInfo & LI,const DominatorTree & DT,ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates)2819cfca06d7SDimitry Andric static int CalculateUnswitchCostMultiplier(
2820e3b55780SDimitry Andric const Instruction &TI, const Loop &L, const LoopInfo &LI,
2821e3b55780SDimitry Andric const DominatorTree &DT,
2822e3b55780SDimitry Andric ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
2823d8e91e46SDimitry Andric
2824d8e91e46SDimitry Andric // Guards and other exiting conditions do not contribute to exponential
2825d8e91e46SDimitry Andric // explosion as soon as they dominate the latch (otherwise there might be
2826d8e91e46SDimitry Andric // another path to the latch remaining that does not allow to eliminate the
2827d8e91e46SDimitry Andric // loop copy on unswitch).
2828e3b55780SDimitry Andric const BasicBlock *Latch = L.getLoopLatch();
2829e3b55780SDimitry Andric const BasicBlock *CondBlock = TI.getParent();
2830d8e91e46SDimitry Andric if (DT.dominates(CondBlock, Latch) &&
2831d8e91e46SDimitry Andric (isGuard(&TI) ||
28327fa27ce4SDimitry Andric (TI.isTerminator() &&
2833e3b55780SDimitry Andric llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
2834d8e91e46SDimitry Andric return L.contains(SuccBB);
28357fa27ce4SDimitry Andric }) <= 1))) {
2836d8e91e46SDimitry Andric NumCostMultiplierSkipped++;
2837d8e91e46SDimitry Andric return 1;
2838d8e91e46SDimitry Andric }
2839d8e91e46SDimitry Andric
2840d8e91e46SDimitry Andric auto *ParentL = L.getParentLoop();
2841d8e91e46SDimitry Andric int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2842d8e91e46SDimitry Andric : std::distance(LI.begin(), LI.end()));
2843d8e91e46SDimitry Andric // Count amount of clones that all the candidates might cause during
28447fa27ce4SDimitry Andric // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
28457fa27ce4SDimitry Andric // cases.
2846d8e91e46SDimitry Andric int UnswitchedClones = 0;
28477fa27ce4SDimitry Andric for (const auto &Candidate : UnswitchCandidates) {
2848e3b55780SDimitry Andric const Instruction *CI = Candidate.TI;
2849e3b55780SDimitry Andric const BasicBlock *CondBlock = CI->getParent();
2850d8e91e46SDimitry Andric bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
28517fa27ce4SDimitry Andric if (isa<SelectInst>(CI)) {
28527fa27ce4SDimitry Andric UnswitchedClones++;
28537fa27ce4SDimitry Andric continue;
28547fa27ce4SDimitry Andric }
2855d8e91e46SDimitry Andric if (isGuard(CI)) {
2856d8e91e46SDimitry Andric if (!SkipExitingSuccessors)
2857d8e91e46SDimitry Andric UnswitchedClones++;
2858d8e91e46SDimitry Andric continue;
2859d8e91e46SDimitry Andric }
2860e3b55780SDimitry Andric int NonExitingSuccessors =
2861e3b55780SDimitry Andric llvm::count_if(successors(CondBlock),
2862e3b55780SDimitry Andric [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
2863d8e91e46SDimitry Andric return !SkipExitingSuccessors || L.contains(SuccBB);
2864d8e91e46SDimitry Andric });
2865d8e91e46SDimitry Andric UnswitchedClones += Log2_32(NonExitingSuccessors);
2866d8e91e46SDimitry Andric }
2867d8e91e46SDimitry Andric
2868d8e91e46SDimitry Andric // Ignore up to the "unscaled candidates" number of unswitch candidates
2869d8e91e46SDimitry Andric // when calculating the power-of-two scaling of the cost. The main idea
2870d8e91e46SDimitry Andric // with this control is to allow a small number of unswitches to happen
2871d8e91e46SDimitry Andric // and rely more on siblings multiplier (see below) when the number
2872d8e91e46SDimitry Andric // of candidates is small.
2873d8e91e46SDimitry Andric unsigned ClonesPower =
2874d8e91e46SDimitry Andric std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2875d8e91e46SDimitry Andric
2876d8e91e46SDimitry Andric // Allowing top-level loops to spread a bit more than nested ones.
2877d8e91e46SDimitry Andric int SiblingsMultiplier =
2878d8e91e46SDimitry Andric std::max((ParentL ? SiblingsCount
2879d8e91e46SDimitry Andric : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2880d8e91e46SDimitry Andric 1);
2881d8e91e46SDimitry Andric // Compute the cost multiplier in a way that won't overflow by saturating
2882d8e91e46SDimitry Andric // at an upper bound.
2883d8e91e46SDimitry Andric int CostMultiplier;
2884d8e91e46SDimitry Andric if (ClonesPower > Log2_32(UnswitchThreshold) ||
2885d8e91e46SDimitry Andric SiblingsMultiplier > UnswitchThreshold)
2886d8e91e46SDimitry Andric CostMultiplier = UnswitchThreshold;
2887d8e91e46SDimitry Andric else
2888d8e91e46SDimitry Andric CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2889d8e91e46SDimitry Andric (int)UnswitchThreshold);
2890d8e91e46SDimitry Andric
2891d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier
2892d8e91e46SDimitry Andric << " (siblings " << SiblingsMultiplier << " * clones "
2893d8e91e46SDimitry Andric << (1 << ClonesPower) << ")"
2894d8e91e46SDimitry Andric << " for unswitch candidate: " << TI << "\n");
2895d8e91e46SDimitry Andric return CostMultiplier;
2896d8e91e46SDimitry Andric }
2897d8e91e46SDimitry Andric
collectUnswitchCandidates(SmallVectorImpl<NonTrivialUnswitchCandidate> & UnswitchCandidates,IVConditionInfo & PartialIVInfo,Instruction * & PartialIVCondBranch,const Loop & L,const LoopInfo & LI,AAResults & AA,const MemorySSAUpdater * MSSAU)2898e3b55780SDimitry Andric static bool collectUnswitchCandidates(
2899e3b55780SDimitry Andric SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
2900e3b55780SDimitry Andric IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
2901e3b55780SDimitry Andric const Loop &L, const LoopInfo &LI, AAResults &AA,
2902e3b55780SDimitry Andric const MemorySSAUpdater *MSSAU) {
2903e3b55780SDimitry Andric assert(UnswitchCandidates.empty() && "Should be!");
29047fa27ce4SDimitry Andric
29057fa27ce4SDimitry Andric auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
29067fa27ce4SDimitry Andric Cond = skipTrivialSelect(Cond);
29077fa27ce4SDimitry Andric if (isa<Constant>(Cond))
29087fa27ce4SDimitry Andric return;
29097fa27ce4SDimitry Andric if (L.isLoopInvariant(Cond)) {
29107fa27ce4SDimitry Andric UnswitchCandidates.push_back({I, {Cond}});
29117fa27ce4SDimitry Andric return;
29127fa27ce4SDimitry Andric }
29137fa27ce4SDimitry Andric if (match(Cond, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
29147fa27ce4SDimitry Andric TinyPtrVector<Value *> Invariants =
29157fa27ce4SDimitry Andric collectHomogenousInstGraphLoopInvariants(
29167fa27ce4SDimitry Andric L, *static_cast<Instruction *>(Cond), LI);
29177fa27ce4SDimitry Andric if (!Invariants.empty())
29187fa27ce4SDimitry Andric UnswitchCandidates.push_back({I, std::move(Invariants)});
29197fa27ce4SDimitry Andric }
29207fa27ce4SDimitry Andric };
29217fa27ce4SDimitry Andric
2922d8e91e46SDimitry Andric // Whether or not we should also collect guards in the loop.
2923d8e91e46SDimitry Andric bool CollectGuards = false;
2924d8e91e46SDimitry Andric if (UnswitchGuards) {
2925d8e91e46SDimitry Andric auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2926d8e91e46SDimitry Andric Intrinsic::getName(Intrinsic::experimental_guard));
2927d8e91e46SDimitry Andric if (GuardDecl && !GuardDecl->use_empty())
2928d8e91e46SDimitry Andric CollectGuards = true;
2929d8e91e46SDimitry Andric }
2930d8e91e46SDimitry Andric
2931eb11fae6SDimitry Andric for (auto *BB : L.blocks()) {
2932eb11fae6SDimitry Andric if (LI.getLoopFor(BB) != &L)
2933eb11fae6SDimitry Andric continue;
2934a303c417SDimitry Andric
29357fa27ce4SDimitry Andric for (auto &I : *BB) {
29367fa27ce4SDimitry Andric if (auto *SI = dyn_cast<SelectInst>(&I)) {
29377fa27ce4SDimitry Andric auto *Cond = SI->getCondition();
29387fa27ce4SDimitry Andric // Do not unswitch vector selects and logical and/or selects
29397fa27ce4SDimitry Andric if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
29407fa27ce4SDimitry Andric AddUnswitchCandidatesForInst(SI, Cond);
29417fa27ce4SDimitry Andric } else if (CollectGuards && isGuard(&I)) {
2942e3b55780SDimitry Andric auto *Cond =
2943e3b55780SDimitry Andric skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
2944d8e91e46SDimitry Andric // TODO: Support AND, OR conditions and partial unswitching.
2945d8e91e46SDimitry Andric if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2946d8e91e46SDimitry Andric UnswitchCandidates.push_back({&I, {Cond}});
2947d8e91e46SDimitry Andric }
29487fa27ce4SDimitry Andric }
2949d8e91e46SDimitry Andric
2950eb11fae6SDimitry Andric if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2951eb11fae6SDimitry Andric // We can only consider fully loop-invariant switch conditions as we need
2952eb11fae6SDimitry Andric // to completely eliminate the switch after unswitching.
2953eb11fae6SDimitry Andric if (!isa<Constant>(SI->getCondition()) &&
2954e6d15924SDimitry Andric L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2955eb11fae6SDimitry Andric UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2956eb11fae6SDimitry Andric continue;
2957eb11fae6SDimitry Andric }
2958a303c417SDimitry Andric
2959eb11fae6SDimitry Andric auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
29607fa27ce4SDimitry Andric if (!BI || !BI->isConditional() ||
2961eb11fae6SDimitry Andric BI->getSuccessor(0) == BI->getSuccessor(1))
2962eb11fae6SDimitry Andric continue;
2963a303c417SDimitry Andric
29647fa27ce4SDimitry Andric AddUnswitchCandidatesForInst(BI, BI->getCondition());
2965344a3780SDimitry Andric }
2966344a3780SDimitry Andric
2967344a3780SDimitry Andric if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
2968344a3780SDimitry Andric !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2969e3b55780SDimitry Andric return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
2970344a3780SDimitry Andric })) {
2971344a3780SDimitry Andric MemorySSA *MSSA = MSSAU->getMemorySSA();
2972344a3780SDimitry Andric if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2973344a3780SDimitry Andric LLVM_DEBUG(
2974344a3780SDimitry Andric dbgs() << "simple-loop-unswitch: Found partially invariant condition "
2975344a3780SDimitry Andric << *Info->InstToDuplicate[0] << "\n");
2976344a3780SDimitry Andric PartialIVInfo = *Info;
2977344a3780SDimitry Andric PartialIVCondBranch = L.getHeader()->getTerminator();
2978344a3780SDimitry Andric TinyPtrVector<Value *> ValsToDuplicate;
2979145449b1SDimitry Andric llvm::append_range(ValsToDuplicate, Info->InstToDuplicate);
2980344a3780SDimitry Andric UnswitchCandidates.push_back(
2981344a3780SDimitry Andric {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2982344a3780SDimitry Andric }
2983eb11fae6SDimitry Andric }
2984e3b55780SDimitry Andric return !UnswitchCandidates.empty();
2985e3b55780SDimitry Andric }
2986044eb2f6SDimitry Andric
29877fa27ce4SDimitry Andric /// Tries to canonicalize condition described by:
29887fa27ce4SDimitry Andric ///
29897fa27ce4SDimitry Andric /// br (LHS pred RHS), label IfTrue, label IfFalse
29907fa27ce4SDimitry Andric ///
29917fa27ce4SDimitry Andric /// into its equivalent where `Pred` is something that we support for injected
29927fa27ce4SDimitry Andric /// invariants (so far it is limited to ult), LHS in canonicalized form is
29937fa27ce4SDimitry Andric /// non-invariant and RHS is an invariant.
canonicalizeForInvariantConditionInjection(ICmpInst::Predicate & Pred,Value * & LHS,Value * & RHS,BasicBlock * & IfTrue,BasicBlock * & IfFalse,const Loop & L)29947fa27ce4SDimitry Andric static void canonicalizeForInvariantConditionInjection(
29957fa27ce4SDimitry Andric ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
29967fa27ce4SDimitry Andric BasicBlock *&IfFalse, const Loop &L) {
29977fa27ce4SDimitry Andric if (!L.contains(IfTrue)) {
29987fa27ce4SDimitry Andric Pred = ICmpInst::getInversePredicate(Pred);
29997fa27ce4SDimitry Andric std::swap(IfTrue, IfFalse);
30007fa27ce4SDimitry Andric }
30017fa27ce4SDimitry Andric
30027fa27ce4SDimitry Andric // Move loop-invariant argument to RHS position.
30037fa27ce4SDimitry Andric if (L.isLoopInvariant(LHS)) {
30047fa27ce4SDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred);
30057fa27ce4SDimitry Andric std::swap(LHS, RHS);
30067fa27ce4SDimitry Andric }
30077fa27ce4SDimitry Andric
30087fa27ce4SDimitry Andric if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
30097fa27ce4SDimitry Andric // Turn "x >=s 0" into "x <u UMIN_INT"
30107fa27ce4SDimitry Andric Pred = ICmpInst::ICMP_ULT;
30117fa27ce4SDimitry Andric RHS = ConstantInt::get(
30127fa27ce4SDimitry Andric RHS->getContext(),
30137fa27ce4SDimitry Andric APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
30147fa27ce4SDimitry Andric }
30157fa27ce4SDimitry Andric }
30167fa27ce4SDimitry Andric
30177fa27ce4SDimitry Andric /// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
30187fa27ce4SDimitry Andric /// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
30197fa27ce4SDimitry Andric /// injecting a loop-invariant condition.
shouldTryInjectInvariantCondition(const ICmpInst::Predicate Pred,const Value * LHS,const Value * RHS,const BasicBlock * IfTrue,const BasicBlock * IfFalse,const Loop & L)30207fa27ce4SDimitry Andric static bool shouldTryInjectInvariantCondition(
30217fa27ce4SDimitry Andric const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
30227fa27ce4SDimitry Andric const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
30237fa27ce4SDimitry Andric if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
30247fa27ce4SDimitry Andric return false;
30257fa27ce4SDimitry Andric // TODO: Support other predicates.
30267fa27ce4SDimitry Andric if (Pred != ICmpInst::ICMP_ULT)
30277fa27ce4SDimitry Andric return false;
30287fa27ce4SDimitry Andric // TODO: Support non-loop-exiting branches?
30297fa27ce4SDimitry Andric if (!L.contains(IfTrue) || L.contains(IfFalse))
30307fa27ce4SDimitry Andric return false;
30317fa27ce4SDimitry Andric // FIXME: For some reason this causes problems with MSSA updates, need to
30327fa27ce4SDimitry Andric // investigate why. So far, just don't unswitch latch.
30337fa27ce4SDimitry Andric if (L.getHeader() == IfTrue)
30347fa27ce4SDimitry Andric return false;
30357fa27ce4SDimitry Andric return true;
30367fa27ce4SDimitry Andric }
30377fa27ce4SDimitry Andric
30387fa27ce4SDimitry Andric /// Returns true, if metadata on \p BI allows us to optimize branching into \p
30397fa27ce4SDimitry Andric /// TakenSucc via injection of invariant conditions. The branch should be not
30407fa27ce4SDimitry Andric /// enough and not previously unswitched, the information about this comes from
30417fa27ce4SDimitry Andric /// the metadata.
shouldTryInjectBasingOnMetadata(const BranchInst * BI,const BasicBlock * TakenSucc)30427fa27ce4SDimitry Andric bool shouldTryInjectBasingOnMetadata(const BranchInst *BI,
30437fa27ce4SDimitry Andric const BasicBlock *TakenSucc) {
30447fa27ce4SDimitry Andric SmallVector<uint32_t> Weights;
30457fa27ce4SDimitry Andric if (!extractBranchWeights(*BI, Weights))
30467fa27ce4SDimitry Andric return false;
30477fa27ce4SDimitry Andric unsigned T = InjectInvariantConditionHotnesThreshold;
30487fa27ce4SDimitry Andric BranchProbability LikelyTaken(T - 1, T);
30497fa27ce4SDimitry Andric
30507fa27ce4SDimitry Andric assert(Weights.size() == 2 && "Unexpected profile data!");
30517fa27ce4SDimitry Andric size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
30527fa27ce4SDimitry Andric auto Num = Weights[Idx];
30537fa27ce4SDimitry Andric auto Denom = Weights[0] + Weights[1];
30547fa27ce4SDimitry Andric // Degenerate or overflowed metadata.
30557fa27ce4SDimitry Andric if (Denom == 0 || Num > Denom)
30567fa27ce4SDimitry Andric return false;
30577fa27ce4SDimitry Andric BranchProbability ActualTaken(Num, Denom);
30587fa27ce4SDimitry Andric if (LikelyTaken > ActualTaken)
30597fa27ce4SDimitry Andric return false;
30607fa27ce4SDimitry Andric return true;
30617fa27ce4SDimitry Andric }
30627fa27ce4SDimitry Andric
30637fa27ce4SDimitry Andric /// Materialize pending invariant condition of the given candidate into IR. The
30647fa27ce4SDimitry Andric /// injected loop-invariant condition implies the original loop-variant branch
30657fa27ce4SDimitry Andric /// condition, so the materialization turns
30667fa27ce4SDimitry Andric ///
30677fa27ce4SDimitry Andric /// loop_block:
30687fa27ce4SDimitry Andric /// ...
30697fa27ce4SDimitry Andric /// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
30707fa27ce4SDimitry Andric ///
30717fa27ce4SDimitry Andric /// into
30727fa27ce4SDimitry Andric ///
30737fa27ce4SDimitry Andric /// preheader:
30747fa27ce4SDimitry Andric /// %invariant_cond = LHS pred RHS
30757fa27ce4SDimitry Andric /// ...
30767fa27ce4SDimitry Andric /// loop_block:
30777fa27ce4SDimitry Andric /// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
30787fa27ce4SDimitry Andric /// OriginalCheck:
30797fa27ce4SDimitry Andric /// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
30807fa27ce4SDimitry Andric /// ...
30817fa27ce4SDimitry Andric static NonTrivialUnswitchCandidate
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate,Loop & L,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,MemorySSAUpdater * MSSAU)30827fa27ce4SDimitry Andric injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
30837fa27ce4SDimitry Andric DominatorTree &DT, LoopInfo &LI,
30847fa27ce4SDimitry Andric AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
30857fa27ce4SDimitry Andric assert(Candidate.hasPendingInjection() && "Nothing to inject!");
30867fa27ce4SDimitry Andric BasicBlock *Preheader = L.getLoopPreheader();
30877fa27ce4SDimitry Andric assert(Preheader && "Loop is not in simplified form?");
30887fa27ce4SDimitry Andric assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
30897fa27ce4SDimitry Andric "Unswitching branch of inner loop!");
30907fa27ce4SDimitry Andric
30917fa27ce4SDimitry Andric auto Pred = Candidate.PendingInjection->Pred;
30927fa27ce4SDimitry Andric auto *LHS = Candidate.PendingInjection->LHS;
30937fa27ce4SDimitry Andric auto *RHS = Candidate.PendingInjection->RHS;
30947fa27ce4SDimitry Andric auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
30957fa27ce4SDimitry Andric auto *TI = cast<BranchInst>(Candidate.TI);
30967fa27ce4SDimitry Andric auto *BB = Candidate.TI->getParent();
30977fa27ce4SDimitry Andric auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
30987fa27ce4SDimitry Andric : TI->getSuccessor(0);
30997fa27ce4SDimitry Andric // FIXME: Remove this once limitation on successors is lifted.
31007fa27ce4SDimitry Andric assert(L.contains(InLoopSucc) && "Not supported yet!");
31017fa27ce4SDimitry Andric assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
31027fa27ce4SDimitry Andric auto &Ctx = BB->getContext();
31037fa27ce4SDimitry Andric
31047fa27ce4SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator());
31057fa27ce4SDimitry Andric assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
31067fa27ce4SDimitry Andric if (LHS->getType() != RHS->getType()) {
31077fa27ce4SDimitry Andric if (LHS->getType()->getIntegerBitWidth() <
31087fa27ce4SDimitry Andric RHS->getType()->getIntegerBitWidth())
31097fa27ce4SDimitry Andric LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
31107fa27ce4SDimitry Andric else
31117fa27ce4SDimitry Andric RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
31127fa27ce4SDimitry Andric }
31137fa27ce4SDimitry Andric // Do not use builder here: CreateICmp may simplify this into a constant and
31147fa27ce4SDimitry Andric // unswitching will break. Better optimize it away later.
31157fa27ce4SDimitry Andric auto *InjectedCond =
31167fa27ce4SDimitry Andric ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
3117ac9a064cSDimitry Andric Preheader->getTerminator()->getIterator());
31187fa27ce4SDimitry Andric
31197fa27ce4SDimitry Andric BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
31207fa27ce4SDimitry Andric BB->getParent(), InLoopSucc);
31217fa27ce4SDimitry Andric Builder.SetInsertPoint(TI);
31227fa27ce4SDimitry Andric auto *InvariantBr =
31237fa27ce4SDimitry Andric Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
31247fa27ce4SDimitry Andric
31257fa27ce4SDimitry Andric Builder.SetInsertPoint(CheckBlock);
3126b1c73532SDimitry Andric Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
3127b1c73532SDimitry Andric TI->getSuccessor(1));
31287fa27ce4SDimitry Andric TI->eraseFromParent();
31297fa27ce4SDimitry Andric
31307fa27ce4SDimitry Andric // Fixup phis.
31317fa27ce4SDimitry Andric for (auto &I : *InLoopSucc) {
31327fa27ce4SDimitry Andric auto *PN = dyn_cast<PHINode>(&I);
31337fa27ce4SDimitry Andric if (!PN)
31347fa27ce4SDimitry Andric break;
31357fa27ce4SDimitry Andric auto *Inc = PN->getIncomingValueForBlock(BB);
31367fa27ce4SDimitry Andric PN->addIncoming(Inc, CheckBlock);
31377fa27ce4SDimitry Andric }
31387fa27ce4SDimitry Andric OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
31397fa27ce4SDimitry Andric
31407fa27ce4SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates = {
31417fa27ce4SDimitry Andric { DominatorTree::Insert, BB, CheckBlock },
31427fa27ce4SDimitry Andric { DominatorTree::Insert, CheckBlock, InLoopSucc },
31437fa27ce4SDimitry Andric { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
31447fa27ce4SDimitry Andric { DominatorTree::Delete, BB, OutOfLoopSucc }
31457fa27ce4SDimitry Andric };
31467fa27ce4SDimitry Andric
31477fa27ce4SDimitry Andric DT.applyUpdates(DTUpdates);
31487fa27ce4SDimitry Andric if (MSSAU)
31497fa27ce4SDimitry Andric MSSAU->applyUpdates(DTUpdates, DT);
31507fa27ce4SDimitry Andric L.addBasicBlockToLoop(CheckBlock, LI);
31517fa27ce4SDimitry Andric
31527fa27ce4SDimitry Andric #ifndef NDEBUG
31537fa27ce4SDimitry Andric DT.verify();
31547fa27ce4SDimitry Andric LI.verify(DT);
31557fa27ce4SDimitry Andric if (MSSAU && VerifyMemorySSA)
31567fa27ce4SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA();
31577fa27ce4SDimitry Andric #endif
31587fa27ce4SDimitry Andric
31597fa27ce4SDimitry Andric // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
31607fa27ce4SDimitry Andric // higher because we have just inserted a new block. Need to think how to
31617fa27ce4SDimitry Andric // adjust the cost of injected candidates when it was first computed.
31627fa27ce4SDimitry Andric LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
31637fa27ce4SDimitry Andric << " and considering it for unswitching.");
31647fa27ce4SDimitry Andric ++NumInvariantConditionsInjected;
31657fa27ce4SDimitry Andric return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
31667fa27ce4SDimitry Andric Candidate.Cost);
31677fa27ce4SDimitry Andric }
31687fa27ce4SDimitry Andric
31697fa27ce4SDimitry Andric /// Given chain of loop branch conditions looking like:
31707fa27ce4SDimitry Andric /// br (Variant < Invariant1)
31717fa27ce4SDimitry Andric /// br (Variant < Invariant2)
31727fa27ce4SDimitry Andric /// br (Variant < Invariant3)
31737fa27ce4SDimitry Andric /// ...
31747fa27ce4SDimitry Andric /// collect set of invariant conditions on which we want to unswitch, which
31757fa27ce4SDimitry Andric /// look like:
31767fa27ce4SDimitry Andric /// Invariant1 <= Invariant2
31777fa27ce4SDimitry Andric /// Invariant2 <= Invariant3
31787fa27ce4SDimitry Andric /// ...
31797fa27ce4SDimitry Andric /// Though they might not immediately exist in the IR, we can still inject them.
insertCandidatesWithPendingInjections(SmallVectorImpl<NonTrivialUnswitchCandidate> & UnswitchCandidates,Loop & L,ICmpInst::Predicate Pred,ArrayRef<CompareDesc> Compares,const DominatorTree & DT)31807fa27ce4SDimitry Andric static bool insertCandidatesWithPendingInjections(
31817fa27ce4SDimitry Andric SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
31827fa27ce4SDimitry Andric ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares,
31837fa27ce4SDimitry Andric const DominatorTree &DT) {
31847fa27ce4SDimitry Andric
31857fa27ce4SDimitry Andric assert(ICmpInst::isRelational(Pred));
31867fa27ce4SDimitry Andric assert(ICmpInst::isStrictPredicate(Pred));
31877fa27ce4SDimitry Andric if (Compares.size() < 2)
31887fa27ce4SDimitry Andric return false;
31897fa27ce4SDimitry Andric ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
31907fa27ce4SDimitry Andric for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
31917fa27ce4SDimitry Andric Next != Compares.end(); ++Prev, ++Next) {
31927fa27ce4SDimitry Andric Value *LHS = Next->Invariant;
31937fa27ce4SDimitry Andric Value *RHS = Prev->Invariant;
31947fa27ce4SDimitry Andric BasicBlock *InLoopSucc = Prev->InLoopSucc;
31957fa27ce4SDimitry Andric InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
31967fa27ce4SDimitry Andric NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
31977fa27ce4SDimitry Andric std::nullopt, std::move(ToInject));
31987fa27ce4SDimitry Andric UnswitchCandidates.push_back(std::move(Candidate));
31997fa27ce4SDimitry Andric }
32007fa27ce4SDimitry Andric return true;
32017fa27ce4SDimitry Andric }
32027fa27ce4SDimitry Andric
32037fa27ce4SDimitry Andric /// Collect unswitch candidates by invariant conditions that are not immediately
32047fa27ce4SDimitry Andric /// present in the loop. However, they can be injected into the code if we
32057fa27ce4SDimitry Andric /// decide it's profitable.
32067fa27ce4SDimitry Andric /// An example of such conditions is following:
32077fa27ce4SDimitry Andric ///
32087fa27ce4SDimitry Andric /// for (...) {
32097fa27ce4SDimitry Andric /// x = load ...
32107fa27ce4SDimitry Andric /// if (! x <u C1) break;
32117fa27ce4SDimitry Andric /// if (! x <u C2) break;
32127fa27ce4SDimitry Andric /// <do something>
32137fa27ce4SDimitry Andric /// }
32147fa27ce4SDimitry Andric ///
32157fa27ce4SDimitry Andric /// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
32167fa27ce4SDimitry Andric /// C2" automatically implies "x <u C2", so we can get rid of one of
32177fa27ce4SDimitry Andric /// loop-variant checks in unswitched loop version.
collectUnswitchCandidatesWithInjections(SmallVectorImpl<NonTrivialUnswitchCandidate> & UnswitchCandidates,IVConditionInfo & PartialIVInfo,Instruction * & PartialIVCondBranch,Loop & L,const DominatorTree & DT,const LoopInfo & LI,AAResults & AA,const MemorySSAUpdater * MSSAU)32187fa27ce4SDimitry Andric static bool collectUnswitchCandidatesWithInjections(
32197fa27ce4SDimitry Andric SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
32207fa27ce4SDimitry Andric IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
32217fa27ce4SDimitry Andric const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
32227fa27ce4SDimitry Andric const MemorySSAUpdater *MSSAU) {
32237fa27ce4SDimitry Andric if (!InjectInvariantConditions)
32247fa27ce4SDimitry Andric return false;
32257fa27ce4SDimitry Andric
32267fa27ce4SDimitry Andric if (!DT.isReachableFromEntry(L.getHeader()))
32277fa27ce4SDimitry Andric return false;
32287fa27ce4SDimitry Andric auto *Latch = L.getLoopLatch();
32297fa27ce4SDimitry Andric // Need to have a single latch and a preheader.
32307fa27ce4SDimitry Andric if (!Latch)
32317fa27ce4SDimitry Andric return false;
32327fa27ce4SDimitry Andric assert(L.getLoopPreheader() && "Must have a preheader!");
32337fa27ce4SDimitry Andric
32347fa27ce4SDimitry Andric DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT;
32357fa27ce4SDimitry Andric // Traverse the conditions that dominate latch (and therefore dominate each
32367fa27ce4SDimitry Andric // other).
32377fa27ce4SDimitry Andric for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
32387fa27ce4SDimitry Andric DTN = DTN->getIDom()) {
32397fa27ce4SDimitry Andric ICmpInst::Predicate Pred;
32407fa27ce4SDimitry Andric Value *LHS = nullptr, *RHS = nullptr;
32417fa27ce4SDimitry Andric BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
32427fa27ce4SDimitry Andric auto *BB = DTN->getBlock();
32437fa27ce4SDimitry Andric // Ignore inner loops.
32447fa27ce4SDimitry Andric if (LI.getLoopFor(BB) != &L)
32457fa27ce4SDimitry Andric continue;
32467fa27ce4SDimitry Andric auto *Term = BB->getTerminator();
32477fa27ce4SDimitry Andric if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
32487fa27ce4SDimitry Andric m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
32497fa27ce4SDimitry Andric continue;
32507fa27ce4SDimitry Andric if (!LHS->getType()->isIntegerTy())
32517fa27ce4SDimitry Andric continue;
32527fa27ce4SDimitry Andric canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
32537fa27ce4SDimitry Andric L);
32547fa27ce4SDimitry Andric if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
32557fa27ce4SDimitry Andric continue;
32567fa27ce4SDimitry Andric if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
32577fa27ce4SDimitry Andric continue;
32587fa27ce4SDimitry Andric // Strip ZEXT for unsigned predicate.
32597fa27ce4SDimitry Andric // TODO: once signed predicates are supported, also strip SEXT.
32607fa27ce4SDimitry Andric CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
32617fa27ce4SDimitry Andric while (auto *Zext = dyn_cast<ZExtInst>(LHS))
32627fa27ce4SDimitry Andric LHS = Zext->getOperand(0);
32637fa27ce4SDimitry Andric CandidatesULT[LHS].push_back(Desc);
32647fa27ce4SDimitry Andric }
32657fa27ce4SDimitry Andric
32667fa27ce4SDimitry Andric bool Found = false;
32677fa27ce4SDimitry Andric for (auto &It : CandidatesULT)
32687fa27ce4SDimitry Andric Found |= insertCandidatesWithPendingInjections(
32697fa27ce4SDimitry Andric UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
32707fa27ce4SDimitry Andric return Found;
32717fa27ce4SDimitry Andric }
32727fa27ce4SDimitry Andric
isSafeForNoNTrivialUnswitching(Loop & L,LoopInfo & LI)3273e3b55780SDimitry Andric static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
3274e3b55780SDimitry Andric if (!L.isSafeToClone())
3275eb11fae6SDimitry Andric return false;
3276e3b55780SDimitry Andric for (auto *BB : L.blocks())
3277e3b55780SDimitry Andric for (auto &I : *BB) {
3278e3b55780SDimitry Andric if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
3279e3b55780SDimitry Andric return false;
3280e3b55780SDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) {
3281e3b55780SDimitry Andric assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
3282e3b55780SDimitry Andric if (CB->isConvergent())
3283e3b55780SDimitry Andric return false;
3284e3b55780SDimitry Andric }
3285e3b55780SDimitry Andric }
3286044eb2f6SDimitry Andric
3287eb11fae6SDimitry Andric // Check if there are irreducible CFG cycles in this loop. If so, we cannot
3288eb11fae6SDimitry Andric // easily unswitch non-trivial edges out of the loop. Doing so might turn the
3289eb11fae6SDimitry Andric // irreducible control flow into reducible control flow and introduce new
3290eb11fae6SDimitry Andric // loops "out of thin air". If we ever discover important use cases for doing
3291eb11fae6SDimitry Andric // this, we can add support to loop unswitch, but it is a lot of complexity
3292eb11fae6SDimitry Andric // for what seems little or no real world benefit.
3293eb11fae6SDimitry Andric LoopBlocksRPO RPOT(&L);
3294eb11fae6SDimitry Andric RPOT.perform(&LI);
3295eb11fae6SDimitry Andric if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
3296eb11fae6SDimitry Andric return false;
3297eb11fae6SDimitry Andric
3298d8e91e46SDimitry Andric SmallVector<BasicBlock *, 4> ExitBlocks;
3299d8e91e46SDimitry Andric L.getUniqueExitBlocks(ExitBlocks);
3300344a3780SDimitry Andric // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
3301344a3780SDimitry Andric // instruction as we don't know how to split those exit blocks.
3302d8e91e46SDimitry Andric // FIXME: We should teach SplitBlock to handle this and remove this
3303d8e91e46SDimitry Andric // restriction.
3304344a3780SDimitry Andric for (auto *ExitBB : ExitBlocks) {
3305344a3780SDimitry Andric auto *I = ExitBB->getFirstNonPHI();
3306344a3780SDimitry Andric if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
3307344a3780SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "
3308344a3780SDimitry Andric "in exit block\n");
3309d8e91e46SDimitry Andric return false;
3310d8e91e46SDimitry Andric }
3311344a3780SDimitry Andric }
3312d8e91e46SDimitry Andric
3313e3b55780SDimitry Andric return true;
3314e3b55780SDimitry Andric }
3315044eb2f6SDimitry Andric
findBestNonTrivialUnswitchCandidate(ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates,const Loop & L,const DominatorTree & DT,const LoopInfo & LI,AssumptionCache & AC,const TargetTransformInfo & TTI,const IVConditionInfo & PartialIVInfo)3316e3b55780SDimitry Andric static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
3317e3b55780SDimitry Andric ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
3318e3b55780SDimitry Andric const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
3319e3b55780SDimitry Andric const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
3320044eb2f6SDimitry Andric // Given that unswitching these terminators will require duplicating parts of
3321044eb2f6SDimitry Andric // the loop, so we need to be able to model that cost. Compute the ephemeral
3322044eb2f6SDimitry Andric // values and set up a data structure to hold per-BB costs. We cache each
3323044eb2f6SDimitry Andric // block's cost so that we don't recompute this when considering different
3324044eb2f6SDimitry Andric // subsets of the loop for duplication during unswitching.
3325044eb2f6SDimitry Andric SmallPtrSet<const Value *, 4> EphValues;
3326044eb2f6SDimitry Andric CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
3327344a3780SDimitry Andric SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
3328044eb2f6SDimitry Andric
3329044eb2f6SDimitry Andric // Compute the cost of each block, as well as the total loop cost. Also, bail
3330044eb2f6SDimitry Andric // out if we see instructions which are incompatible with loop unswitching
3331044eb2f6SDimitry Andric // (convergent, noduplicate, or cross-basic-block tokens).
3332044eb2f6SDimitry Andric // FIXME: We might be able to safely handle some of these in non-duplicated
3333044eb2f6SDimitry Andric // regions.
3334b60736ecSDimitry Andric TargetTransformInfo::TargetCostKind CostKind =
3335b60736ecSDimitry Andric L.getHeader()->getParent()->hasMinSize()
3336b60736ecSDimitry Andric ? TargetTransformInfo::TCK_CodeSize
3337b60736ecSDimitry Andric : TargetTransformInfo::TCK_SizeAndLatency;
3338344a3780SDimitry Andric InstructionCost LoopCost = 0;
3339044eb2f6SDimitry Andric for (auto *BB : L.blocks()) {
3340344a3780SDimitry Andric InstructionCost Cost = 0;
3341044eb2f6SDimitry Andric for (auto &I : *BB) {
3342044eb2f6SDimitry Andric if (EphValues.count(&I))
3343044eb2f6SDimitry Andric continue;
3344e3b55780SDimitry Andric Cost += TTI.getInstructionCost(&I, CostKind);
3345044eb2f6SDimitry Andric }
3346044eb2f6SDimitry Andric assert(Cost >= 0 && "Must not have negative costs!");
3347044eb2f6SDimitry Andric LoopCost += Cost;
3348044eb2f6SDimitry Andric assert(LoopCost >= 0 && "Must not have negative loop costs!");
3349044eb2f6SDimitry Andric BBCostMap[BB] = Cost;
3350044eb2f6SDimitry Andric }
3351eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n");
3352044eb2f6SDimitry Andric
3353044eb2f6SDimitry Andric // Now we find the best candidate by searching for the one with the following
3354044eb2f6SDimitry Andric // properties in order:
3355044eb2f6SDimitry Andric //
3356044eb2f6SDimitry Andric // 1) An unswitching cost below the threshold
3357044eb2f6SDimitry Andric // 2) The smallest number of duplicated unswitch candidates (to avoid
3358044eb2f6SDimitry Andric // creating redundant subsequent unswitching)
3359044eb2f6SDimitry Andric // 3) The smallest cost after unswitching.
3360044eb2f6SDimitry Andric //
3361044eb2f6SDimitry Andric // We prioritize reducing fanout of unswitch candidates provided the cost
3362044eb2f6SDimitry Andric // remains below the threshold because this has a multiplicative effect.
3363044eb2f6SDimitry Andric //
3364044eb2f6SDimitry Andric // This requires memoizing each dominator subtree to avoid redundant work.
3365044eb2f6SDimitry Andric //
3366044eb2f6SDimitry Andric // FIXME: Need to actually do the number of candidates part above.
3367344a3780SDimitry Andric SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
3368044eb2f6SDimitry Andric // Given a terminator which might be unswitched, computes the non-duplicated
3369044eb2f6SDimitry Andric // cost for that terminator.
3370344a3780SDimitry Andric auto ComputeUnswitchedCost = [&](Instruction &TI,
3371344a3780SDimitry Andric bool FullUnswitch) -> InstructionCost {
33727fa27ce4SDimitry Andric // Unswitching selects unswitches the entire loop.
33737fa27ce4SDimitry Andric if (isa<SelectInst>(TI))
33747fa27ce4SDimitry Andric return LoopCost;
33757fa27ce4SDimitry Andric
3376eb11fae6SDimitry Andric BasicBlock &BB = *TI.getParent();
3377044eb2f6SDimitry Andric SmallPtrSet<BasicBlock *, 4> Visited;
3378044eb2f6SDimitry Andric
3379344a3780SDimitry Andric InstructionCost Cost = 0;
3380044eb2f6SDimitry Andric for (BasicBlock *SuccBB : successors(&BB)) {
3381044eb2f6SDimitry Andric // Don't count successors more than once.
3382044eb2f6SDimitry Andric if (!Visited.insert(SuccBB).second)
3383044eb2f6SDimitry Andric continue;
3384044eb2f6SDimitry Andric
3385eb11fae6SDimitry Andric // If this is a partial unswitch candidate, then it must be a conditional
3386344a3780SDimitry Andric // branch with a condition of either `or`, `and`, their corresponding
3387344a3780SDimitry Andric // select forms or partially invariant instructions. In that case, one of
3388eb11fae6SDimitry Andric // the successors is necessarily duplicated, so don't even try to remove
3389eb11fae6SDimitry Andric // its cost.
3390eb11fae6SDimitry Andric if (!FullUnswitch) {
3391eb11fae6SDimitry Andric auto &BI = cast<BranchInst>(TI);
3392145449b1SDimitry Andric Value *Cond = skipTrivialSelect(BI.getCondition());
3393145449b1SDimitry Andric if (match(Cond, m_LogicalAnd())) {
3394eb11fae6SDimitry Andric if (SuccBB == BI.getSuccessor(1))
3395eb11fae6SDimitry Andric continue;
3396145449b1SDimitry Andric } else if (match(Cond, m_LogicalOr())) {
3397eb11fae6SDimitry Andric if (SuccBB == BI.getSuccessor(0))
3398eb11fae6SDimitry Andric continue;
3399344a3780SDimitry Andric } else if ((PartialIVInfo.KnownValue->isOneValue() &&
3400344a3780SDimitry Andric SuccBB == BI.getSuccessor(0)) ||
3401344a3780SDimitry Andric (!PartialIVInfo.KnownValue->isOneValue() &&
3402344a3780SDimitry Andric SuccBB == BI.getSuccessor(1)))
3403344a3780SDimitry Andric continue;
3404eb11fae6SDimitry Andric }
3405eb11fae6SDimitry Andric
3406044eb2f6SDimitry Andric // This successor's domtree will not need to be duplicated after
3407044eb2f6SDimitry Andric // unswitching if the edge to the successor dominates it (and thus the
3408044eb2f6SDimitry Andric // entire tree). This essentially means there is no other path into this
3409044eb2f6SDimitry Andric // subtree and so it will end up live in only one clone of the loop.
3410044eb2f6SDimitry Andric if (SuccBB->getUniquePredecessor() ||
3411044eb2f6SDimitry Andric llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
3412044eb2f6SDimitry Andric return PredBB == &BB || DT.dominates(SuccBB, PredBB);
3413044eb2f6SDimitry Andric })) {
3414344a3780SDimitry Andric Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
3415344a3780SDimitry Andric assert(Cost <= LoopCost &&
3416044eb2f6SDimitry Andric "Non-duplicated cost should never exceed total loop cost!");
3417044eb2f6SDimitry Andric }
3418044eb2f6SDimitry Andric }
3419044eb2f6SDimitry Andric
3420044eb2f6SDimitry Andric // Now scale the cost by the number of unique successors minus one. We
3421044eb2f6SDimitry Andric // subtract one because there is already at least one copy of the entire
3422044eb2f6SDimitry Andric // loop. This is computing the new cost of unswitching a condition.
3423d8e91e46SDimitry Andric // Note that guards always have 2 unique successors that are implicit and
3424d8e91e46SDimitry Andric // will be materialized if we decide to unswitch it.
3425d8e91e46SDimitry Andric int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
3426d8e91e46SDimitry Andric assert(SuccessorsCount > 1 &&
3427044eb2f6SDimitry Andric "Cannot unswitch a condition without multiple distinct successors!");
3428344a3780SDimitry Andric return (LoopCost - Cost) * (SuccessorsCount - 1);
3429044eb2f6SDimitry Andric };
3430e3b55780SDimitry Andric
3431e3b55780SDimitry Andric std::optional<NonTrivialUnswitchCandidate> Best;
3432e3b55780SDimitry Andric for (auto &Candidate : UnswitchCandidates) {
3433e3b55780SDimitry Andric Instruction &TI = *Candidate.TI;
3434e3b55780SDimitry Andric ArrayRef<Value *> Invariants = Candidate.Invariants;
3435eb11fae6SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(&TI);
34367fa27ce4SDimitry Andric bool FullUnswitch =
34377fa27ce4SDimitry Andric !BI || Candidate.hasPendingInjection() ||
3438145449b1SDimitry Andric (Invariants.size() == 1 &&
34397fa27ce4SDimitry Andric Invariants[0] == skipTrivialSelect(BI->getCondition()));
34407fa27ce4SDimitry Andric InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
3441d8e91e46SDimitry Andric // Calculate cost multiplier which is a tool to limit potentially
3442d8e91e46SDimitry Andric // exponential behavior of loop-unswitch.
3443d8e91e46SDimitry Andric if (EnableUnswitchCostMultiplier) {
3444d8e91e46SDimitry Andric int CostMultiplier =
3445cfca06d7SDimitry Andric CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
3446d8e91e46SDimitry Andric assert(
3447d8e91e46SDimitry Andric (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&
3448d8e91e46SDimitry Andric "cost multiplier needs to be in the range of 1..UnswitchThreshold");
3449d8e91e46SDimitry Andric CandidateCost *= CostMultiplier;
3450d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3451d8e91e46SDimitry Andric << " (multiplier: " << CostMultiplier << ")"
3452d8e91e46SDimitry Andric << " for unswitch candidate: " << TI << "\n");
3453d8e91e46SDimitry Andric } else {
3454eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
3455eb11fae6SDimitry Andric << " for unswitch candidate: " << TI << "\n");
3456d8e91e46SDimitry Andric }
3457d8e91e46SDimitry Andric
3458e3b55780SDimitry Andric if (!Best || CandidateCost < Best->Cost) {
3459e3b55780SDimitry Andric Best = Candidate;
3460e3b55780SDimitry Andric Best->Cost = CandidateCost;
3461044eb2f6SDimitry Andric }
3462044eb2f6SDimitry Andric }
3463e3b55780SDimitry Andric assert(Best && "Must be!");
3464e3b55780SDimitry Andric return *Best;
3465e3b55780SDimitry Andric }
3466044eb2f6SDimitry Andric
34677fa27ce4SDimitry Andric // Insert a freeze on an unswitched branch if all is true:
34687fa27ce4SDimitry Andric // 1. freeze-loop-unswitch-cond option is true
34697fa27ce4SDimitry Andric // 2. The branch may not execute in the loop pre-transformation. If a branch may
34707fa27ce4SDimitry Andric // not execute and could cause UB, it would always cause UB if it is hoisted outside
34717fa27ce4SDimitry Andric // of the loop. Insert a freeze to prevent this case.
34727fa27ce4SDimitry Andric // 3. The branch condition may be poison or undef
shouldInsertFreeze(Loop & L,Instruction & TI,DominatorTree & DT,AssumptionCache & AC)34737fa27ce4SDimitry Andric static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT,
34747fa27ce4SDimitry Andric AssumptionCache &AC) {
34757fa27ce4SDimitry Andric assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
34767fa27ce4SDimitry Andric if (!FreezeLoopUnswitchCond)
34777fa27ce4SDimitry Andric return false;
34787fa27ce4SDimitry Andric
34797fa27ce4SDimitry Andric ICFLoopSafetyInfo SafetyInfo;
34807fa27ce4SDimitry Andric SafetyInfo.computeLoopSafetyInfo(&L);
34817fa27ce4SDimitry Andric if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
34827fa27ce4SDimitry Andric return false;
34837fa27ce4SDimitry Andric
34847fa27ce4SDimitry Andric Value *Cond;
34857fa27ce4SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
34867fa27ce4SDimitry Andric Cond = skipTrivialSelect(BI->getCondition());
34877fa27ce4SDimitry Andric else
34887fa27ce4SDimitry Andric Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
34897fa27ce4SDimitry Andric return !isGuaranteedNotToBeUndefOrPoison(
34907fa27ce4SDimitry Andric Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
34917fa27ce4SDimitry Andric }
34927fa27ce4SDimitry Andric
unswitchBestCondition(Loop & L,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,AAResults & AA,TargetTransformInfo & TTI,ScalarEvolution * SE,MemorySSAUpdater * MSSAU,LPMUpdater & LoopUpdater)3493b1c73532SDimitry Andric static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
3494b1c73532SDimitry Andric AssumptionCache &AC, AAResults &AA,
3495b1c73532SDimitry Andric TargetTransformInfo &TTI, ScalarEvolution *SE,
3496b1c73532SDimitry Andric MemorySSAUpdater *MSSAU,
3497b1c73532SDimitry Andric LPMUpdater &LoopUpdater) {
3498e3b55780SDimitry Andric // Collect all invariant conditions within this loop (as opposed to an inner
3499e3b55780SDimitry Andric // loop which would be handled when visiting that inner loop).
3500e3b55780SDimitry Andric SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
3501e3b55780SDimitry Andric IVConditionInfo PartialIVInfo;
3502e3b55780SDimitry Andric Instruction *PartialIVCondBranch = nullptr;
35037fa27ce4SDimitry Andric collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
35047fa27ce4SDimitry Andric PartialIVCondBranch, L, LI, AA, MSSAU);
3505b1c73532SDimitry Andric if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable"))
35067fa27ce4SDimitry Andric collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
35077fa27ce4SDimitry Andric PartialIVCondBranch, L, DT, LI, AA,
35087fa27ce4SDimitry Andric MSSAU);
3509e3b55780SDimitry Andric // If we didn't find any candidates, we're done.
35107fa27ce4SDimitry Andric if (UnswitchCandidates.empty())
3511e3b55780SDimitry Andric return false;
3512e3b55780SDimitry Andric
3513e3b55780SDimitry Andric LLVM_DEBUG(
3514e3b55780SDimitry Andric dbgs() << "Considering " << UnswitchCandidates.size()
3515e3b55780SDimitry Andric << " non-trivial loop invariant conditions for unswitching.\n");
3516e3b55780SDimitry Andric
3517e3b55780SDimitry Andric NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
3518e3b55780SDimitry Andric UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
3519e3b55780SDimitry Andric
3520e3b55780SDimitry Andric assert(Best.TI && "Failed to find loop unswitch candidate");
3521e3b55780SDimitry Andric assert(Best.Cost && "Failed to compute cost");
3522e3b55780SDimitry Andric
3523e3b55780SDimitry Andric if (*Best.Cost >= UnswitchThreshold) {
3524e3b55780SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
3525e3b55780SDimitry Andric << "\n");
3526eb11fae6SDimitry Andric return false;
3527044eb2f6SDimitry Andric }
3528a303c417SDimitry Andric
3529b1c73532SDimitry Andric bool InjectedCondition = false;
3530b1c73532SDimitry Andric if (Best.hasPendingInjection()) {
35317fa27ce4SDimitry Andric Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
3532b1c73532SDimitry Andric InjectedCondition = true;
3533b1c73532SDimitry Andric }
35347fa27ce4SDimitry Andric assert(!Best.hasPendingInjection() &&
35357fa27ce4SDimitry Andric "All injections should have been done by now!");
35367fa27ce4SDimitry Andric
3537e3b55780SDimitry Andric if (Best.TI != PartialIVCondBranch)
3538344a3780SDimitry Andric PartialIVInfo.InstToDuplicate.clear();
3539344a3780SDimitry Andric
35407fa27ce4SDimitry Andric bool InsertFreeze;
35417fa27ce4SDimitry Andric if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
35427fa27ce4SDimitry Andric // If the best candidate is a select, turn it into a branch. Select
35437fa27ce4SDimitry Andric // instructions with a poison conditional do not propagate poison, but
35447fa27ce4SDimitry Andric // branching on poison causes UB. Insert a freeze on the select
35457fa27ce4SDimitry Andric // conditional to prevent UB after turning the select into a branch.
35467fa27ce4SDimitry Andric InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
35477fa27ce4SDimitry Andric SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
35487fa27ce4SDimitry Andric Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
35497fa27ce4SDimitry Andric } else {
3550d8e91e46SDimitry Andric // If the best candidate is a guard, turn it into a branch.
3551e3b55780SDimitry Andric if (isGuard(Best.TI))
3552e3b55780SDimitry Andric Best.TI =
3553e3b55780SDimitry Andric turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
35547fa27ce4SDimitry Andric InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
35557fa27ce4SDimitry Andric }
3556d8e91e46SDimitry Andric
3557e3b55780SDimitry Andric LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
3558e3b55780SDimitry Andric << ") terminator: " << *Best.TI << "\n");
3559e3b55780SDimitry Andric unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
3560b1c73532SDimitry Andric LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze,
3561b1c73532SDimitry Andric InjectedCondition);
3562d8e91e46SDimitry Andric return true;
3563eb11fae6SDimitry Andric }
3564eb11fae6SDimitry Andric
3565eb11fae6SDimitry Andric /// Unswitch control flow predicated on loop invariant conditions.
3566eb11fae6SDimitry Andric ///
3567eb11fae6SDimitry Andric /// This first hoists all branches or switches which are trivial (IE, do not
3568eb11fae6SDimitry Andric /// require duplicating any part of the loop) out of the loop body. It then
3569eb11fae6SDimitry Andric /// looks at other loop invariant control flows and tries to unswitch those as
3570eb11fae6SDimitry Andric /// well by cloning the loop if the result is small enough.
3571eb11fae6SDimitry Andric ///
3572344a3780SDimitry Andric /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
3573344a3780SDimitry Andric /// also updated based on the unswitch. The `MSSA` analysis is also updated if
3574344a3780SDimitry Andric /// valid (i.e. its use is enabled).
3575eb11fae6SDimitry Andric ///
3576eb11fae6SDimitry Andric /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
3577eb11fae6SDimitry Andric /// true, we will attempt to do non-trivial unswitching as well as trivial
3578eb11fae6SDimitry Andric /// unswitching.
3579eb11fae6SDimitry Andric ///
3580b1c73532SDimitry Andric /// The `postUnswitch` function will be run after unswitching is complete
3581b1c73532SDimitry Andric /// with information on whether or not the provided loop remains a loop and
3582b1c73532SDimitry Andric /// a list of new sibling loops created.
3583eb11fae6SDimitry Andric ///
3584eb11fae6SDimitry Andric /// If `SE` is non-null, we will update that analysis based on the unswitching
3585eb11fae6SDimitry Andric /// done.
unswitchLoop(Loop & L,DominatorTree & DT,LoopInfo & LI,AssumptionCache & AC,AAResults & AA,TargetTransformInfo & TTI,bool Trivial,bool NonTrivial,ScalarEvolution * SE,MemorySSAUpdater * MSSAU,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI,LPMUpdater & LoopUpdater)3586b1c73532SDimitry Andric static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
3587b1c73532SDimitry Andric AssumptionCache &AC, AAResults &AA,
3588b1c73532SDimitry Andric TargetTransformInfo &TTI, bool Trivial,
3589b1c73532SDimitry Andric bool NonTrivial, ScalarEvolution *SE,
3590b1c73532SDimitry Andric MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI,
3591b1c73532SDimitry Andric BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
3592eb11fae6SDimitry Andric assert(L.isRecursivelyLCSSAForm(DT, LI) &&
3593eb11fae6SDimitry Andric "Loops must be in LCSSA form before unswitching.");
3594eb11fae6SDimitry Andric
3595eb11fae6SDimitry Andric // Must be in loop simplified form: we need a preheader and dedicated exits.
3596eb11fae6SDimitry Andric if (!L.isLoopSimplifyForm())
3597eb11fae6SDimitry Andric return false;
3598eb11fae6SDimitry Andric
3599eb11fae6SDimitry Andric // Try trivial unswitch first before loop over other basic blocks in the loop.
3600344a3780SDimitry Andric if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3601eb11fae6SDimitry Andric // If we unswitched successfully we will want to clean up the loop before
3602eb11fae6SDimitry Andric // processing it further so just mark it as unswitched and return.
3603b1c73532SDimitry Andric postUnswitch(L, LoopUpdater, L.getName(),
3604b1c73532SDimitry Andric /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false,
3605b1c73532SDimitry Andric /*InjectedCondition*/ false, {});
3606eb11fae6SDimitry Andric return true;
3607eb11fae6SDimitry Andric }
3608eb11fae6SDimitry Andric
36097fa27ce4SDimitry Andric const Function *F = L.getHeader()->getParent();
36107fa27ce4SDimitry Andric
3611344a3780SDimitry Andric // Check whether we should continue with non-trivial conditions.
3612344a3780SDimitry Andric // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3613344a3780SDimitry Andric // unswitching for testing and debugging.
3614344a3780SDimitry Andric // NonTrivial: Parameter that enables non-trivial unswitching for this
3615344a3780SDimitry Andric // invocation of the transform. But this should be allowed only
3616344a3780SDimitry Andric // for targets without branch divergence.
3617344a3780SDimitry Andric //
3618344a3780SDimitry Andric // FIXME: If divergence analysis becomes available to a loop
3619344a3780SDimitry Andric // transform, we should allow unswitching for non-trivial uniform
3620344a3780SDimitry Andric // branches even on targets that have divergence.
3621344a3780SDimitry Andric // https://bugs.llvm.org/show_bug.cgi?id=48819
3622344a3780SDimitry Andric bool ContinueWithNonTrivial =
36237fa27ce4SDimitry Andric EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
3624344a3780SDimitry Andric if (!ContinueWithNonTrivial)
3625eb11fae6SDimitry Andric return false;
3626eb11fae6SDimitry Andric
3627b60736ecSDimitry Andric // Skip non-trivial unswitching for optsize functions.
36287fa27ce4SDimitry Andric if (F->hasOptSize())
3629b60736ecSDimitry Andric return false;
3630b60736ecSDimitry Andric
36317fa27ce4SDimitry Andric // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
36327fa27ce4SDimitry Andric // of the loops L is nested in, and of the loops nested in L are all cold.
36337fa27ce4SDimitry Andric auto IsLoopNestCold = [&](const Loop *L) {
36347fa27ce4SDimitry Andric // Check L and all of its parent loops.
36357fa27ce4SDimitry Andric auto *Parent = L;
36367fa27ce4SDimitry Andric while (Parent) {
36377fa27ce4SDimitry Andric if (!PSI->isColdBlock(Parent->getHeader(), BFI))
36387fa27ce4SDimitry Andric return false;
36397fa27ce4SDimitry Andric Parent = Parent->getParentLoop();
36407fa27ce4SDimitry Andric }
36417fa27ce4SDimitry Andric // Next check all loops nested within L.
36427fa27ce4SDimitry Andric SmallVector<const Loop *, 4> Worklist;
36437fa27ce4SDimitry Andric Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
36447fa27ce4SDimitry Andric L->getSubLoops().end());
36457fa27ce4SDimitry Andric while (!Worklist.empty()) {
36467fa27ce4SDimitry Andric auto *CurLoop = Worklist.pop_back_val();
36477fa27ce4SDimitry Andric if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
36487fa27ce4SDimitry Andric return false;
36497fa27ce4SDimitry Andric Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
36507fa27ce4SDimitry Andric CurLoop->getSubLoops().end());
36517fa27ce4SDimitry Andric }
36527fa27ce4SDimitry Andric return true;
36537fa27ce4SDimitry Andric };
36547fa27ce4SDimitry Andric
36557fa27ce4SDimitry Andric // Skip cold loops in cold loop nests, as unswitching them brings little
36567fa27ce4SDimitry Andric // benefit but increases the code size
36577fa27ce4SDimitry Andric if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
3658e3b55780SDimitry Andric LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3659e3b55780SDimitry Andric return false;
3660e3b55780SDimitry Andric }
3661e3b55780SDimitry Andric
3662e3b55780SDimitry Andric // Perform legality checks.
3663e3b55780SDimitry Andric if (!isSafeForNoNTrivialUnswitching(L, LI))
3664344a3780SDimitry Andric return false;
3665344a3780SDimitry Andric
3666eb11fae6SDimitry Andric // For non-trivial unswitching, because it often creates new loops, we rely on
3667eb11fae6SDimitry Andric // the pass manager to iterate on the loops rather than trying to immediately
3668eb11fae6SDimitry Andric // reach a fixed point. There is no substantial advantage to iterating
3669eb11fae6SDimitry Andric // internally, and if any of the new loops are simplified enough to contain
3670eb11fae6SDimitry Andric // trivial unswitching we want to prefer those.
3671eb11fae6SDimitry Andric
3672eb11fae6SDimitry Andric // Try to unswitch the best invariant condition. We prefer this full unswitch to
3673eb11fae6SDimitry Andric // a partial unswitch when possible below the threshold.
3674b1c73532SDimitry Andric if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater))
3675eb11fae6SDimitry Andric return true;
3676eb11fae6SDimitry Andric
3677eb11fae6SDimitry Andric // No other opportunities to unswitch.
3678b60736ecSDimitry Andric return false;
3679a303c417SDimitry Andric }
3680a303c417SDimitry Andric
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)3681a303c417SDimitry Andric PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
3682a303c417SDimitry Andric LoopStandardAnalysisResults &AR,
3683a303c417SDimitry Andric LPMUpdater &U) {
3684a303c417SDimitry Andric Function &F = *L.getHeader()->getParent();
3685a303c417SDimitry Andric (void)F;
3686e3b55780SDimitry Andric ProfileSummaryInfo *PSI = nullptr;
3687e3b55780SDimitry Andric if (auto OuterProxy =
3688e3b55780SDimitry Andric AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
3689e3b55780SDimitry Andric .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3690e3b55780SDimitry Andric PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
3691eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
3692eb11fae6SDimitry Andric << "\n");
3693a303c417SDimitry Andric
3694e3b55780SDimitry Andric std::optional<MemorySSAUpdater> MSSAU;
3695d8e91e46SDimitry Andric if (AR.MSSA) {
3696d8e91e46SDimitry Andric MSSAU = MemorySSAUpdater(AR.MSSA);
3697d8e91e46SDimitry Andric if (VerifyMemorySSA)
3698d8e91e46SDimitry Andric AR.MSSA->verifyMemorySSA();
3699d8e91e46SDimitry Andric }
3700344a3780SDimitry Andric if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3701b1c73532SDimitry Andric &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
3702a303c417SDimitry Andric return PreservedAnalyses::all();
3703a303c417SDimitry Andric
3704d8e91e46SDimitry Andric if (AR.MSSA && VerifyMemorySSA)
3705d8e91e46SDimitry Andric AR.MSSA->verifyMemorySSA();
3706d8e91e46SDimitry Andric
3707a303c417SDimitry Andric // Historically this pass has had issues with the dominator tree so verify it
3708a303c417SDimitry Andric // in asserts builds.
3709eb11fae6SDimitry Andric assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
3710e6d15924SDimitry Andric
3711e6d15924SDimitry Andric auto PA = getLoopPassPreservedAnalyses();
37121d5ae102SDimitry Andric if (AR.MSSA)
3713e6d15924SDimitry Andric PA.preserve<MemorySSAAnalysis>();
3714e6d15924SDimitry Andric return PA;
3715a303c417SDimitry Andric }
3716a303c417SDimitry Andric
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)3717c0981da4SDimitry Andric void SimpleLoopUnswitchPass::printPipeline(
3718c0981da4SDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
3719c0981da4SDimitry Andric static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
3720c0981da4SDimitry Andric OS, MapClassName2PassName);
3721c0981da4SDimitry Andric
37227fa27ce4SDimitry Andric OS << '<';
3723c0981da4SDimitry Andric OS << (NonTrivial ? "" : "no-") << "nontrivial;";
3724c0981da4SDimitry Andric OS << (Trivial ? "" : "no-") << "trivial";
37257fa27ce4SDimitry Andric OS << '>';
3726c0981da4SDimitry Andric }
3727