1e6d15924SDimitry Andric //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2e6d15924SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e6d15924SDimitry Andric //
7e6d15924SDimitry Andric //===----------------------------------------------------------------------===//
8e6d15924SDimitry Andric /// \file
9e6d15924SDimitry Andric /// Insert hardware loop intrinsics into loops which are deemed profitable by
10e6d15924SDimitry Andric /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11e6d15924SDimitry Andric /// two intrinsics: one, outside the loop, to set the loop iteration count and
12e6d15924SDimitry Andric /// another, in the exit block, to decrement the counter. The decremented value
13e6d15924SDimitry Andric /// can either be carried through the loop via a phi or handled in some opaque
14e6d15924SDimitry Andric /// way by the target.
15e6d15924SDimitry Andric ///
16e6d15924SDimitry Andric //===----------------------------------------------------------------------===//
17e6d15924SDimitry Andric
187fa27ce4SDimitry Andric #include "llvm/CodeGen/HardwareLoops.h"
19e6d15924SDimitry Andric #include "llvm/ADT/Statistic.h"
20e6d15924SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
217fa27ce4SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
22e6d15924SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
23706b4fc4SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
24e6d15924SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
25cfca06d7SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
26e6d15924SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
27e6d15924SDimitry Andric #include "llvm/CodeGen/Passes.h"
28e6d15924SDimitry Andric #include "llvm/IR/BasicBlock.h"
29706b4fc4SDimitry Andric #include "llvm/IR/Constants.h"
30e6d15924SDimitry Andric #include "llvm/IR/Dominators.h"
31e6d15924SDimitry Andric #include "llvm/IR/IRBuilder.h"
32e6d15924SDimitry Andric #include "llvm/IR/Instructions.h"
33e6d15924SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
34e6d15924SDimitry Andric #include "llvm/IR/Value.h"
35706b4fc4SDimitry Andric #include "llvm/InitializePasses.h"
36706b4fc4SDimitry Andric #include "llvm/Pass.h"
37706b4fc4SDimitry Andric #include "llvm/PassRegistry.h"
38706b4fc4SDimitry Andric #include "llvm/Support/CommandLine.h"
39e6d15924SDimitry Andric #include "llvm/Support/Debug.h"
40e6d15924SDimitry Andric #include "llvm/Transforms/Utils.h"
41e6d15924SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42e6d15924SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
43e6d15924SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
44cfca06d7SDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
45e6d15924SDimitry Andric
46e6d15924SDimitry Andric #define DEBUG_TYPE "hardware-loops"
47e6d15924SDimitry Andric
48e6d15924SDimitry Andric #define HW_LOOPS_NAME "Hardware Loop Insertion"
49e6d15924SDimitry Andric
50e6d15924SDimitry Andric using namespace llvm;
51e6d15924SDimitry Andric
52e6d15924SDimitry Andric static cl::opt<bool>
53e6d15924SDimitry Andric ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
54e6d15924SDimitry Andric cl::desc("Force hardware loops intrinsics to be inserted"));
55e6d15924SDimitry Andric
56e6d15924SDimitry Andric static cl::opt<bool>
57e6d15924SDimitry Andric ForceHardwareLoopPHI(
58e6d15924SDimitry Andric "force-hardware-loop-phi", cl::Hidden, cl::init(false),
59e6d15924SDimitry Andric cl::desc("Force hardware loop counter to be updated through a phi"));
60e6d15924SDimitry Andric
61e6d15924SDimitry Andric static cl::opt<bool>
62e6d15924SDimitry Andric ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
63e6d15924SDimitry Andric cl::desc("Force allowance of nested hardware loops"));
64e6d15924SDimitry Andric
65e6d15924SDimitry Andric static cl::opt<unsigned>
66e6d15924SDimitry Andric LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
67e6d15924SDimitry Andric cl::desc("Set the loop decrement value"));
68e6d15924SDimitry Andric
69e6d15924SDimitry Andric static cl::opt<unsigned>
70e6d15924SDimitry Andric CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
71e6d15924SDimitry Andric cl::desc("Set the loop counter bitwidth"));
72e6d15924SDimitry Andric
73e6d15924SDimitry Andric static cl::opt<bool>
74e6d15924SDimitry Andric ForceGuardLoopEntry(
75e6d15924SDimitry Andric "force-hardware-loop-guard", cl::Hidden, cl::init(false),
76e6d15924SDimitry Andric cl::desc("Force generation of loop guard intrinsic"));
77e6d15924SDimitry Andric
78e6d15924SDimitry Andric STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
79e6d15924SDimitry Andric
80706b4fc4SDimitry Andric #ifndef NDEBUG
debugHWLoopFailure(const StringRef DebugMsg,Instruction * I)81706b4fc4SDimitry Andric static void debugHWLoopFailure(const StringRef DebugMsg,
82706b4fc4SDimitry Andric Instruction *I) {
83706b4fc4SDimitry Andric dbgs() << "HWLoops: " << DebugMsg;
84706b4fc4SDimitry Andric if (I)
85706b4fc4SDimitry Andric dbgs() << ' ' << *I;
86706b4fc4SDimitry Andric else
87706b4fc4SDimitry Andric dbgs() << '.';
88706b4fc4SDimitry Andric dbgs() << '\n';
89706b4fc4SDimitry Andric }
90706b4fc4SDimitry Andric #endif
91706b4fc4SDimitry Andric
92706b4fc4SDimitry Andric static OptimizationRemarkAnalysis
createHWLoopAnalysis(StringRef RemarkName,Loop * L,Instruction * I)93706b4fc4SDimitry Andric createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
94706b4fc4SDimitry Andric Value *CodeRegion = L->getHeader();
95706b4fc4SDimitry Andric DebugLoc DL = L->getStartLoc();
96706b4fc4SDimitry Andric
97706b4fc4SDimitry Andric if (I) {
98706b4fc4SDimitry Andric CodeRegion = I->getParent();
99706b4fc4SDimitry Andric // If there is no debug location attached to the instruction, revert back to
100706b4fc4SDimitry Andric // using the loop's.
101706b4fc4SDimitry Andric if (I->getDebugLoc())
102706b4fc4SDimitry Andric DL = I->getDebugLoc();
103706b4fc4SDimitry Andric }
104706b4fc4SDimitry Andric
105706b4fc4SDimitry Andric OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
106706b4fc4SDimitry Andric R << "hardware-loop not created: ";
107706b4fc4SDimitry Andric return R;
108706b4fc4SDimitry Andric }
109706b4fc4SDimitry Andric
110e6d15924SDimitry Andric namespace {
111e6d15924SDimitry Andric
reportHWLoopFailure(const StringRef Msg,const StringRef ORETag,OptimizationRemarkEmitter * ORE,Loop * TheLoop,Instruction * I=nullptr)112706b4fc4SDimitry Andric void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
113706b4fc4SDimitry Andric OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
114706b4fc4SDimitry Andric LLVM_DEBUG(debugHWLoopFailure(Msg, I));
115706b4fc4SDimitry Andric ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
116706b4fc4SDimitry Andric }
117706b4fc4SDimitry Andric
118e6d15924SDimitry Andric using TTI = TargetTransformInfo;
119e6d15924SDimitry Andric
1207fa27ce4SDimitry Andric class HardwareLoopsLegacy : public FunctionPass {
121e6d15924SDimitry Andric public:
122e6d15924SDimitry Andric static char ID;
123e6d15924SDimitry Andric
HardwareLoopsLegacy()1247fa27ce4SDimitry Andric HardwareLoopsLegacy() : FunctionPass(ID) {
1257fa27ce4SDimitry Andric initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
126e6d15924SDimitry Andric }
127e6d15924SDimitry Andric
128e6d15924SDimitry Andric bool runOnFunction(Function &F) override;
129e6d15924SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const130e6d15924SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
131e6d15924SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
132e6d15924SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
133e6d15924SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
134e6d15924SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
135e6d15924SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>();
1367fa27ce4SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>();
137e6d15924SDimitry Andric AU.addRequired<AssumptionCacheTracker>();
138e6d15924SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
139706b4fc4SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1407fa27ce4SDimitry Andric AU.addPreserved<BranchProbabilityInfoWrapperPass>();
141e6d15924SDimitry Andric }
1427fa27ce4SDimitry Andric };
143e6d15924SDimitry Andric
1447fa27ce4SDimitry Andric class HardwareLoopsImpl {
1457fa27ce4SDimitry Andric public:
HardwareLoopsImpl(ScalarEvolution & SE,LoopInfo & LI,bool PreserveLCSSA,DominatorTree & DT,const DataLayout & DL,const TargetTransformInfo & TTI,TargetLibraryInfo * TLI,AssumptionCache & AC,OptimizationRemarkEmitter * ORE,HardwareLoopOptions & Opts)1467fa27ce4SDimitry Andric HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
1477fa27ce4SDimitry Andric DominatorTree &DT, const DataLayout &DL,
1487fa27ce4SDimitry Andric const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
1497fa27ce4SDimitry Andric AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
1507fa27ce4SDimitry Andric HardwareLoopOptions &Opts)
1517fa27ce4SDimitry Andric : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
1527fa27ce4SDimitry Andric TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
1537fa27ce4SDimitry Andric
1547fa27ce4SDimitry Andric bool run(Function &F);
1557fa27ce4SDimitry Andric
1567fa27ce4SDimitry Andric private:
157e6d15924SDimitry Andric // Try to convert the given Loop into a hardware loop.
1587fa27ce4SDimitry Andric bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
159e6d15924SDimitry Andric
160e6d15924SDimitry Andric // Given that the target believes the loop to be profitable, try to
161e6d15924SDimitry Andric // convert it.
162e6d15924SDimitry Andric bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
163e6d15924SDimitry Andric
1647fa27ce4SDimitry Andric ScalarEvolution &SE;
1657fa27ce4SDimitry Andric LoopInfo &LI;
1667fa27ce4SDimitry Andric bool PreserveLCSSA;
1677fa27ce4SDimitry Andric DominatorTree &DT;
1687fa27ce4SDimitry Andric const DataLayout &DL;
1697fa27ce4SDimitry Andric const TargetTransformInfo &TTI;
1707fa27ce4SDimitry Andric TargetLibraryInfo *TLI = nullptr;
1717fa27ce4SDimitry Andric AssumptionCache &AC;
1727fa27ce4SDimitry Andric OptimizationRemarkEmitter *ORE;
1737fa27ce4SDimitry Andric HardwareLoopOptions &Opts;
174e6d15924SDimitry Andric bool MadeChange = false;
175e6d15924SDimitry Andric };
176e6d15924SDimitry Andric
177e6d15924SDimitry Andric class HardwareLoop {
178e6d15924SDimitry Andric // Expand the trip count scev into a value that we can use.
179e6d15924SDimitry Andric Value *InitLoopCount();
180e6d15924SDimitry Andric
181e6d15924SDimitry Andric // Insert the set_loop_iteration intrinsic.
182b60736ecSDimitry Andric Value *InsertIterationSetup(Value *LoopCountInit);
183e6d15924SDimitry Andric
184e6d15924SDimitry Andric // Insert the loop_decrement intrinsic.
185e6d15924SDimitry Andric void InsertLoopDec();
186e6d15924SDimitry Andric
187e6d15924SDimitry Andric // Insert the loop_decrement_reg intrinsic.
188e6d15924SDimitry Andric Instruction *InsertLoopRegDec(Value *EltsRem);
189e6d15924SDimitry Andric
190e6d15924SDimitry Andric // If the target requires the counter value to be updated in the loop,
191e6d15924SDimitry Andric // insert a phi to hold the value. The intended purpose is for use by
192e6d15924SDimitry Andric // loop_decrement_reg.
193e6d15924SDimitry Andric PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
194e6d15924SDimitry Andric
195e6d15924SDimitry Andric // Create a new cmp, that checks the returned value of loop_decrement*,
196e6d15924SDimitry Andric // and update the exit branch to use it.
197e6d15924SDimitry Andric void UpdateBranch(Value *EltsRem);
198e6d15924SDimitry Andric
199e6d15924SDimitry Andric public:
HardwareLoop(HardwareLoopInfo & Info,ScalarEvolution & SE,const DataLayout & DL,OptimizationRemarkEmitter * ORE,HardwareLoopOptions & Opts)200e6d15924SDimitry Andric HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
201706b4fc4SDimitry Andric const DataLayout &DL,
2027fa27ce4SDimitry Andric OptimizationRemarkEmitter *ORE,
2037fa27ce4SDimitry Andric HardwareLoopOptions &Opts) :
2047fa27ce4SDimitry Andric SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
205c0981da4SDimitry Andric ExitCount(Info.ExitCount),
206e6d15924SDimitry Andric CountType(Info.CountType),
207e6d15924SDimitry Andric ExitBranch(Info.ExitBranch),
208e6d15924SDimitry Andric LoopDecrement(Info.LoopDecrement),
209e6d15924SDimitry Andric UsePHICounter(Info.CounterInReg),
210e6d15924SDimitry Andric UseLoopGuard(Info.PerformEntryTest) { }
211e6d15924SDimitry Andric
212e6d15924SDimitry Andric void Create();
213e6d15924SDimitry Andric
214e6d15924SDimitry Andric private:
215e6d15924SDimitry Andric ScalarEvolution &SE;
216e6d15924SDimitry Andric const DataLayout &DL;
217706b4fc4SDimitry Andric OptimizationRemarkEmitter *ORE = nullptr;
2187fa27ce4SDimitry Andric HardwareLoopOptions &Opts;
219e6d15924SDimitry Andric Loop *L = nullptr;
220e6d15924SDimitry Andric Module *M = nullptr;
221c0981da4SDimitry Andric const SCEV *ExitCount = nullptr;
222e6d15924SDimitry Andric Type *CountType = nullptr;
223e6d15924SDimitry Andric BranchInst *ExitBranch = nullptr;
224e6d15924SDimitry Andric Value *LoopDecrement = nullptr;
225e6d15924SDimitry Andric bool UsePHICounter = false;
226e6d15924SDimitry Andric bool UseLoopGuard = false;
227e6d15924SDimitry Andric BasicBlock *BeginBB = nullptr;
228e6d15924SDimitry Andric };
229e6d15924SDimitry Andric }
230e6d15924SDimitry Andric
2317fa27ce4SDimitry Andric char HardwareLoopsLegacy::ID = 0;
232e6d15924SDimitry Andric
runOnFunction(Function & F)2337fa27ce4SDimitry Andric bool HardwareLoopsLegacy::runOnFunction(Function &F) {
234e6d15924SDimitry Andric if (skipFunction(F))
235e6d15924SDimitry Andric return false;
236e6d15924SDimitry Andric
237e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
238e6d15924SDimitry Andric
2397fa27ce4SDimitry Andric auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2407fa27ce4SDimitry Andric auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2417fa27ce4SDimitry Andric auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2427fa27ce4SDimitry Andric auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
243ac9a064cSDimitry Andric auto &DL = F.getDataLayout();
2447fa27ce4SDimitry Andric auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
245e6d15924SDimitry Andric auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2467fa27ce4SDimitry Andric auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2477fa27ce4SDimitry Andric auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2487fa27ce4SDimitry Andric bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
249e6d15924SDimitry Andric
2507fa27ce4SDimitry Andric HardwareLoopOptions Opts;
2517fa27ce4SDimitry Andric if (ForceHardwareLoops.getNumOccurrences())
2527fa27ce4SDimitry Andric Opts.setForce(ForceHardwareLoops);
2537fa27ce4SDimitry Andric if (ForceHardwareLoopPHI.getNumOccurrences())
2547fa27ce4SDimitry Andric Opts.setForcePhi(ForceHardwareLoopPHI);
2557fa27ce4SDimitry Andric if (ForceNestedLoop.getNumOccurrences())
2567fa27ce4SDimitry Andric Opts.setForceNested(ForceNestedLoop);
2577fa27ce4SDimitry Andric if (ForceGuardLoopEntry.getNumOccurrences())
2587fa27ce4SDimitry Andric Opts.setForceGuard(ForceGuardLoopEntry);
2597fa27ce4SDimitry Andric if (LoopDecrement.getNumOccurrences())
2607fa27ce4SDimitry Andric Opts.setDecrement(LoopDecrement);
2617fa27ce4SDimitry Andric if (CounterBitWidth.getNumOccurrences())
2627fa27ce4SDimitry Andric Opts.setCounterBitwidth(CounterBitWidth);
2637fa27ce4SDimitry Andric
2647fa27ce4SDimitry Andric HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
2657fa27ce4SDimitry Andric Opts);
2667fa27ce4SDimitry Andric return Impl.run(F);
2677fa27ce4SDimitry Andric }
2687fa27ce4SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)2697fa27ce4SDimitry Andric PreservedAnalyses HardwareLoopsPass::run(Function &F,
2707fa27ce4SDimitry Andric FunctionAnalysisManager &AM) {
2717fa27ce4SDimitry Andric auto &LI = AM.getResult<LoopAnalysis>(F);
2727fa27ce4SDimitry Andric auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
2737fa27ce4SDimitry Andric auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
2747fa27ce4SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F);
2757fa27ce4SDimitry Andric auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
2767fa27ce4SDimitry Andric auto &AC = AM.getResult<AssumptionAnalysis>(F);
2777fa27ce4SDimitry Andric auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
278ac9a064cSDimitry Andric auto &DL = F.getDataLayout();
2797fa27ce4SDimitry Andric
2807fa27ce4SDimitry Andric HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
2817fa27ce4SDimitry Andric bool Changed = Impl.run(F);
2827fa27ce4SDimitry Andric if (!Changed)
2837fa27ce4SDimitry Andric return PreservedAnalyses::all();
2847fa27ce4SDimitry Andric
2857fa27ce4SDimitry Andric PreservedAnalyses PA;
2867fa27ce4SDimitry Andric PA.preserve<LoopAnalysis>();
2877fa27ce4SDimitry Andric PA.preserve<ScalarEvolutionAnalysis>();
2887fa27ce4SDimitry Andric PA.preserve<DominatorTreeAnalysis>();
2897fa27ce4SDimitry Andric PA.preserve<BranchProbabilityAnalysis>();
2907fa27ce4SDimitry Andric return PA;
2917fa27ce4SDimitry Andric }
2927fa27ce4SDimitry Andric
run(Function & F)2937fa27ce4SDimitry Andric bool HardwareLoopsImpl::run(Function &F) {
294ac9a064cSDimitry Andric LLVMContext &Ctx = F.getContext();
2957fa27ce4SDimitry Andric for (Loop *L : LI)
296b60736ecSDimitry Andric if (L->isOutermost())
2977fa27ce4SDimitry Andric TryConvertLoop(L, Ctx);
298e6d15924SDimitry Andric return MadeChange;
299e6d15924SDimitry Andric }
300e6d15924SDimitry Andric
301e6d15924SDimitry Andric // Return true if the search should stop, which will be when an inner loop is
302e6d15924SDimitry Andric // converted and the parent loop doesn't support containing a hardware loop.
TryConvertLoop(Loop * L,LLVMContext & Ctx)3037fa27ce4SDimitry Andric bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
304e6d15924SDimitry Andric // Process nested loops first.
305cfca06d7SDimitry Andric bool AnyChanged = false;
306cfca06d7SDimitry Andric for (Loop *SL : *L)
3077fa27ce4SDimitry Andric AnyChanged |= TryConvertLoop(SL, Ctx);
308cfca06d7SDimitry Andric if (AnyChanged) {
309706b4fc4SDimitry Andric reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
310706b4fc4SDimitry Andric ORE, L);
311e6d15924SDimitry Andric return true; // Stop search.
312706b4fc4SDimitry Andric }
313cfca06d7SDimitry Andric
314cfca06d7SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
315e6d15924SDimitry Andric
316e6d15924SDimitry Andric HardwareLoopInfo HWLoopInfo(L);
3177fa27ce4SDimitry Andric if (!HWLoopInfo.canAnalyze(LI)) {
318706b4fc4SDimitry Andric reportHWLoopFailure("cannot analyze loop, irreducible control flow",
319706b4fc4SDimitry Andric "HWLoopCannotAnalyze", ORE, L);
320e6d15924SDimitry Andric return false;
321706b4fc4SDimitry Andric }
322e6d15924SDimitry Andric
3237fa27ce4SDimitry Andric if (!Opts.Force &&
3247fa27ce4SDimitry Andric !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
325706b4fc4SDimitry Andric reportHWLoopFailure("it's not profitable to create a hardware-loop",
326706b4fc4SDimitry Andric "HWLoopNotProfitable", ORE, L);
327706b4fc4SDimitry Andric return false;
328706b4fc4SDimitry Andric }
329e6d15924SDimitry Andric
330e6d15924SDimitry Andric // Allow overriding of the counter width and loop decrement value.
3317fa27ce4SDimitry Andric if (Opts.Bitwidth.has_value()) {
3327fa27ce4SDimitry Andric HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
333e6d15924SDimitry Andric }
334e6d15924SDimitry Andric
3357fa27ce4SDimitry Andric if (Opts.Decrement.has_value())
3367fa27ce4SDimitry Andric HWLoopInfo.LoopDecrement =
3377fa27ce4SDimitry Andric ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
3387fa27ce4SDimitry Andric
3397fa27ce4SDimitry Andric MadeChange |= TryConvertLoop(HWLoopInfo);
3407fa27ce4SDimitry Andric return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
3417fa27ce4SDimitry Andric }
3427fa27ce4SDimitry Andric
TryConvertLoop(HardwareLoopInfo & HWLoopInfo)3437fa27ce4SDimitry Andric bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
344e6d15924SDimitry Andric
345e6d15924SDimitry Andric Loop *L = HWLoopInfo.L;
346e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
347e6d15924SDimitry Andric
3487fa27ce4SDimitry Andric if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
3497fa27ce4SDimitry Andric Opts.getForcePhi())) {
350706b4fc4SDimitry Andric // TODO: there can be many reasons a loop is not considered a
351706b4fc4SDimitry Andric // candidate, so we should let isHardwareLoopCandidate fill in the
352706b4fc4SDimitry Andric // reason and then report a better message here.
353706b4fc4SDimitry Andric reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
354e6d15924SDimitry Andric return false;
355706b4fc4SDimitry Andric }
356e6d15924SDimitry Andric
357e6d15924SDimitry Andric assert(
358c0981da4SDimitry Andric (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
359e6d15924SDimitry Andric "Hardware Loop must have set exit info.");
360e6d15924SDimitry Andric
361e6d15924SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader();
362e6d15924SDimitry Andric
363e6d15924SDimitry Andric // If we don't have a preheader, then insert one.
364e6d15924SDimitry Andric if (!Preheader)
3657fa27ce4SDimitry Andric Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
366e6d15924SDimitry Andric if (!Preheader)
367e6d15924SDimitry Andric return false;
368e6d15924SDimitry Andric
3697fa27ce4SDimitry Andric HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
370e6d15924SDimitry Andric HWLoop.Create();
371e6d15924SDimitry Andric ++NumHWLoops;
372e6d15924SDimitry Andric return true;
373e6d15924SDimitry Andric }
374e6d15924SDimitry Andric
Create()375e6d15924SDimitry Andric void HardwareLoop::Create() {
376e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
377e6d15924SDimitry Andric
378e6d15924SDimitry Andric Value *LoopCountInit = InitLoopCount();
379706b4fc4SDimitry Andric if (!LoopCountInit) {
380706b4fc4SDimitry Andric reportHWLoopFailure("could not safely create a loop count expression",
381706b4fc4SDimitry Andric "HWLoopNotSafe", ORE, L);
382e6d15924SDimitry Andric return;
383706b4fc4SDimitry Andric }
384e6d15924SDimitry Andric
385b60736ecSDimitry Andric Value *Setup = InsertIterationSetup(LoopCountInit);
386e6d15924SDimitry Andric
3877fa27ce4SDimitry Andric if (UsePHICounter || Opts.ForcePhi) {
388e6d15924SDimitry Andric Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
389b60736ecSDimitry Andric Value *EltsRem = InsertPHICounter(Setup, LoopDec);
390e6d15924SDimitry Andric LoopDec->setOperand(0, EltsRem);
391e6d15924SDimitry Andric UpdateBranch(LoopDec);
392e6d15924SDimitry Andric } else
393e6d15924SDimitry Andric InsertLoopDec();
394e6d15924SDimitry Andric
395e6d15924SDimitry Andric // Run through the basic blocks of the loop and see if any of them have dead
396e6d15924SDimitry Andric // PHIs that can be removed.
3974b4fe385SDimitry Andric for (auto *I : L->blocks())
398e6d15924SDimitry Andric DeleteDeadPHIs(I);
399e6d15924SDimitry Andric }
400e6d15924SDimitry Andric
CanGenerateTest(Loop * L,Value * Count)401e6d15924SDimitry Andric static bool CanGenerateTest(Loop *L, Value *Count) {
402e6d15924SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader();
403e6d15924SDimitry Andric if (!Preheader->getSinglePredecessor())
404e6d15924SDimitry Andric return false;
405e6d15924SDimitry Andric
406e6d15924SDimitry Andric BasicBlock *Pred = Preheader->getSinglePredecessor();
407e6d15924SDimitry Andric if (!isa<BranchInst>(Pred->getTerminator()))
408e6d15924SDimitry Andric return false;
409e6d15924SDimitry Andric
410e6d15924SDimitry Andric auto *BI = cast<BranchInst>(Pred->getTerminator());
411e6d15924SDimitry Andric if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
412e6d15924SDimitry Andric return false;
413e6d15924SDimitry Andric
414e6d15924SDimitry Andric // Check that the icmp is checking for equality of Count and zero and that
415e6d15924SDimitry Andric // a non-zero value results in entering the loop.
416e6d15924SDimitry Andric auto ICmp = cast<ICmpInst>(BI->getCondition());
417e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
418e6d15924SDimitry Andric if (!ICmp->isEquality())
419e6d15924SDimitry Andric return false;
420e6d15924SDimitry Andric
421e6d15924SDimitry Andric auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
422e6d15924SDimitry Andric if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
423e6d15924SDimitry Andric return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
424e6d15924SDimitry Andric return false;
425e6d15924SDimitry Andric };
426e6d15924SDimitry Andric
427c0981da4SDimitry Andric // Check if Count is a zext.
428c0981da4SDimitry Andric Value *CountBefZext =
429c0981da4SDimitry Andric isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
430c0981da4SDimitry Andric
431c0981da4SDimitry Andric if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
432c0981da4SDimitry Andric !IsCompareZero(ICmp, CountBefZext, 0) &&
433c0981da4SDimitry Andric !IsCompareZero(ICmp, CountBefZext, 1))
434e6d15924SDimitry Andric return false;
435e6d15924SDimitry Andric
436e6d15924SDimitry Andric unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
437e6d15924SDimitry Andric if (BI->getSuccessor(SuccIdx) != Preheader)
438e6d15924SDimitry Andric return false;
439e6d15924SDimitry Andric
440e6d15924SDimitry Andric return true;
441e6d15924SDimitry Andric }
442e6d15924SDimitry Andric
InitLoopCount()443e6d15924SDimitry Andric Value *HardwareLoop::InitLoopCount() {
444e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
445e6d15924SDimitry Andric // Can we replace a conditional branch with an intrinsic that sets the
446e6d15924SDimitry Andric // loop counter and tests that is not zero?
447e6d15924SDimitry Andric
448e6d15924SDimitry Andric SCEVExpander SCEVE(SE, DL, "loopcnt");
449c0981da4SDimitry Andric if (!ExitCount->getType()->isPointerTy() &&
450c0981da4SDimitry Andric ExitCount->getType() != CountType)
451c0981da4SDimitry Andric ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
452c0981da4SDimitry Andric
453c0981da4SDimitry Andric ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
454e6d15924SDimitry Andric
455e6d15924SDimitry Andric // If we're trying to use the 'test and set' form of the intrinsic, we need
456e6d15924SDimitry Andric // to replace a conditional branch that is controlling entry to the loop. It
457e6d15924SDimitry Andric // is likely (guaranteed?) that the preheader has an unconditional branch to
458e6d15924SDimitry Andric // the loop header, so also check if it has a single predecessor.
459c0981da4SDimitry Andric if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
460c0981da4SDimitry Andric SE.getZero(ExitCount->getType()))) {
461e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
4627fa27ce4SDimitry Andric if (Opts.ForceGuard)
4637fa27ce4SDimitry Andric UseLoopGuard = true;
464e6d15924SDimitry Andric } else
465e6d15924SDimitry Andric UseLoopGuard = false;
466e6d15924SDimitry Andric
467e6d15924SDimitry Andric BasicBlock *BB = L->getLoopPreheader();
468e6d15924SDimitry Andric if (UseLoopGuard && BB->getSinglePredecessor() &&
469b60736ecSDimitry Andric cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
470b60736ecSDimitry Andric BasicBlock *Predecessor = BB->getSinglePredecessor();
471b60736ecSDimitry Andric // If it's not safe to create a while loop then don't force it and create a
472b60736ecSDimitry Andric // do-while loop instead
4734b4fe385SDimitry Andric if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
474b60736ecSDimitry Andric UseLoopGuard = false;
475b60736ecSDimitry Andric else
476b60736ecSDimitry Andric BB = Predecessor;
477b60736ecSDimitry Andric }
478e6d15924SDimitry Andric
4794b4fe385SDimitry Andric if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
480c0981da4SDimitry Andric LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
481c0981da4SDimitry Andric << *ExitCount << "\n");
482e6d15924SDimitry Andric return nullptr;
483e6d15924SDimitry Andric }
484e6d15924SDimitry Andric
485c0981da4SDimitry Andric Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
486e6d15924SDimitry Andric BB->getTerminator());
487e6d15924SDimitry Andric
488e6d15924SDimitry Andric // FIXME: We've expanded Count where we hope to insert the counter setting
489e6d15924SDimitry Andric // intrinsic. But, in the case of the 'test and set' form, we may fallback to
490e6d15924SDimitry Andric // the just 'set' form and in which case the insertion block is most likely
491e6d15924SDimitry Andric // different. It means there will be instruction(s) in a block that possibly
492e6d15924SDimitry Andric // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
493e6d15924SDimitry Andric // but it's doesn't appear to work in all cases.
494e6d15924SDimitry Andric
495e6d15924SDimitry Andric UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
496e6d15924SDimitry Andric BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
497e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
498e6d15924SDimitry Andric << " - Expanded Count in " << BB->getName() << "\n"
499e6d15924SDimitry Andric << " - Will insert set counter intrinsic into: "
500e6d15924SDimitry Andric << BeginBB->getName() << "\n");
501e6d15924SDimitry Andric return Count;
502e6d15924SDimitry Andric }
503e6d15924SDimitry Andric
InsertIterationSetup(Value * LoopCountInit)504b60736ecSDimitry Andric Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
505e6d15924SDimitry Andric IRBuilder<> Builder(BeginBB->getTerminator());
506ac9a064cSDimitry Andric if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
507ac9a064cSDimitry Andric Builder.setIsFPConstrained(true);
508e6d15924SDimitry Andric Type *Ty = LoopCountInit->getType();
5097fa27ce4SDimitry Andric bool UsePhi = UsePHICounter || Opts.ForcePhi;
510344a3780SDimitry Andric Intrinsic::ID ID = UseLoopGuard
511344a3780SDimitry Andric ? (UsePhi ? Intrinsic::test_start_loop_iterations
512344a3780SDimitry Andric : Intrinsic::test_set_loop_iterations)
513b60736ecSDimitry Andric : (UsePhi ? Intrinsic::start_loop_iterations
514b60736ecSDimitry Andric : Intrinsic::set_loop_iterations);
515e6d15924SDimitry Andric Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
516344a3780SDimitry Andric Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);
517e6d15924SDimitry Andric
518e6d15924SDimitry Andric // Use the return value of the intrinsic to control the entry of the loop.
519e6d15924SDimitry Andric if (UseLoopGuard) {
520e6d15924SDimitry Andric assert((isa<BranchInst>(BeginBB->getTerminator()) &&
521e6d15924SDimitry Andric cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
522e6d15924SDimitry Andric "Expected conditional branch");
523344a3780SDimitry Andric
524344a3780SDimitry Andric Value *SetCount =
525344a3780SDimitry Andric UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
526e6d15924SDimitry Andric auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
527e6d15924SDimitry Andric LoopGuard->setCondition(SetCount);
528e6d15924SDimitry Andric if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
529e6d15924SDimitry Andric LoopGuard->swapSuccessors();
530e6d15924SDimitry Andric }
531344a3780SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
532344a3780SDimitry Andric << "\n");
533344a3780SDimitry Andric if (UsePhi && UseLoopGuard)
534344a3780SDimitry Andric LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
535344a3780SDimitry Andric return !UsePhi ? LoopCountInit : LoopSetup;
536e6d15924SDimitry Andric }
537e6d15924SDimitry Andric
InsertLoopDec()538e6d15924SDimitry Andric void HardwareLoop::InsertLoopDec() {
539e6d15924SDimitry Andric IRBuilder<> CondBuilder(ExitBranch);
540ac9a064cSDimitry Andric if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
541ac9a064cSDimitry Andric Attribute::StrictFP))
542ac9a064cSDimitry Andric CondBuilder.setIsFPConstrained(true);
543e6d15924SDimitry Andric
544e6d15924SDimitry Andric Function *DecFunc =
545e6d15924SDimitry Andric Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
546e6d15924SDimitry Andric LoopDecrement->getType());
547e6d15924SDimitry Andric Value *Ops[] = { LoopDecrement };
548e6d15924SDimitry Andric Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
549e6d15924SDimitry Andric Value *OldCond = ExitBranch->getCondition();
550e6d15924SDimitry Andric ExitBranch->setCondition(NewCond);
551e6d15924SDimitry Andric
552e6d15924SDimitry Andric // The false branch must exit the loop.
553e6d15924SDimitry Andric if (!L->contains(ExitBranch->getSuccessor(0)))
554e6d15924SDimitry Andric ExitBranch->swapSuccessors();
555e6d15924SDimitry Andric
556e6d15924SDimitry Andric // The old condition may be dead now, and may have even created a dead PHI
557e6d15924SDimitry Andric // (the original induction variable).
558e6d15924SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(OldCond);
559e6d15924SDimitry Andric
560e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
561e6d15924SDimitry Andric }
562e6d15924SDimitry Andric
InsertLoopRegDec(Value * EltsRem)563e6d15924SDimitry Andric Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
564e6d15924SDimitry Andric IRBuilder<> CondBuilder(ExitBranch);
565ac9a064cSDimitry Andric if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
566ac9a064cSDimitry Andric Attribute::StrictFP))
567ac9a064cSDimitry Andric CondBuilder.setIsFPConstrained(true);
568e6d15924SDimitry Andric
569e6d15924SDimitry Andric Function *DecFunc =
570e6d15924SDimitry Andric Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
571cfca06d7SDimitry Andric { EltsRem->getType() });
572e6d15924SDimitry Andric Value *Ops[] = { EltsRem, LoopDecrement };
573e6d15924SDimitry Andric Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
574e6d15924SDimitry Andric
575e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
576e6d15924SDimitry Andric return cast<Instruction>(Call);
577e6d15924SDimitry Andric }
578e6d15924SDimitry Andric
InsertPHICounter(Value * NumElts,Value * EltsRem)579e6d15924SDimitry Andric PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
580e6d15924SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader();
581e6d15924SDimitry Andric BasicBlock *Header = L->getHeader();
582e6d15924SDimitry Andric BasicBlock *Latch = ExitBranch->getParent();
583ac9a064cSDimitry Andric IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
584e6d15924SDimitry Andric PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
585e6d15924SDimitry Andric Index->addIncoming(NumElts, Preheader);
586e6d15924SDimitry Andric Index->addIncoming(EltsRem, Latch);
587e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
588e6d15924SDimitry Andric return Index;
589e6d15924SDimitry Andric }
590e6d15924SDimitry Andric
UpdateBranch(Value * EltsRem)591e6d15924SDimitry Andric void HardwareLoop::UpdateBranch(Value *EltsRem) {
592e6d15924SDimitry Andric IRBuilder<> CondBuilder(ExitBranch);
593e6d15924SDimitry Andric Value *NewCond =
594e6d15924SDimitry Andric CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
595e6d15924SDimitry Andric Value *OldCond = ExitBranch->getCondition();
596e6d15924SDimitry Andric ExitBranch->setCondition(NewCond);
597e6d15924SDimitry Andric
598e6d15924SDimitry Andric // The false branch must exit the loop.
599e6d15924SDimitry Andric if (!L->contains(ExitBranch->getSuccessor(0)))
600e6d15924SDimitry Andric ExitBranch->swapSuccessors();
601e6d15924SDimitry Andric
602e6d15924SDimitry Andric // The old condition may be dead now, and may have even created a dead PHI
603e6d15924SDimitry Andric // (the original induction variable).
604e6d15924SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(OldCond);
605e6d15924SDimitry Andric }
606e6d15924SDimitry Andric
INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy,DEBUG_TYPE,HW_LOOPS_NAME,false,false)6077fa27ce4SDimitry Andric INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
608e6d15924SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
609e6d15924SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
610e6d15924SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
611706b4fc4SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6127fa27ce4SDimitry Andric INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
613e6d15924SDimitry Andric
6147fa27ce4SDimitry Andric FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
615