1600c6fa1SEd Schouten //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2600c6fa1SEd Schouten //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6600c6fa1SEd Schouten //
7600c6fa1SEd Schouten //===----------------------------------------------------------------------===//
8600c6fa1SEd Schouten //
9600c6fa1SEd Schouten // This pass performs partial inlining, typically by inlining an if statement
10600c6fa1SEd Schouten // that surrounds the body of the function.
11600c6fa1SEd Schouten //
12600c6fa1SEd Schouten //===----------------------------------------------------------------------===//
13600c6fa1SEd Schouten
1401095a5dSDimitry Andric #include "llvm/Transforms/IPO/PartialInlining.h"
15044eb2f6SDimitry Andric #include "llvm/ADT/DenseMap.h"
16044eb2f6SDimitry Andric #include "llvm/ADT/DenseSet.h"
177fa27ce4SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
18044eb2f6SDimitry Andric #include "llvm/ADT/STLExtras.h"
19044eb2f6SDimitry Andric #include "llvm/ADT/SmallVector.h"
204a16efa3SDimitry Andric #include "llvm/ADT/Statistic.h"
21b915e9e0SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
22b915e9e0SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
23a303c417SDimitry Andric #include "llvm/Analysis/InlineCost.h"
24b915e9e0SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
25044eb2f6SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
26a303c417SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
27a303c417SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
28a303c417SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
29044eb2f6SDimitry Andric #include "llvm/IR/Attributes.h"
30044eb2f6SDimitry Andric #include "llvm/IR/BasicBlock.h"
315ca98fd9SDimitry Andric #include "llvm/IR/CFG.h"
32044eb2f6SDimitry Andric #include "llvm/IR/DebugLoc.h"
3312f3ca4cSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
345ca98fd9SDimitry Andric #include "llvm/IR/Dominators.h"
35044eb2f6SDimitry Andric #include "llvm/IR/Function.h"
36044eb2f6SDimitry Andric #include "llvm/IR/InstrTypes.h"
37044eb2f6SDimitry Andric #include "llvm/IR/Instruction.h"
384a16efa3SDimitry Andric #include "llvm/IR/Instructions.h"
39044eb2f6SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
40044eb2f6SDimitry Andric #include "llvm/IR/Intrinsics.h"
414a16efa3SDimitry Andric #include "llvm/IR/Module.h"
42145449b1SDimitry Andric #include "llvm/IR/Operator.h"
43e3b55780SDimitry Andric #include "llvm/IR/ProfDataUtils.h"
44044eb2f6SDimitry Andric #include "llvm/IR/User.h"
45044eb2f6SDimitry Andric #include "llvm/Support/BlockFrequency.h"
46044eb2f6SDimitry Andric #include "llvm/Support/BranchProbability.h"
47044eb2f6SDimitry Andric #include "llvm/Support/Casting.h"
48044eb2f6SDimitry Andric #include "llvm/Support/CommandLine.h"
49044eb2f6SDimitry Andric #include "llvm/Support/ErrorHandling.h"
5001095a5dSDimitry Andric #include "llvm/Transforms/IPO.h"
51600c6fa1SEd Schouten #include "llvm/Transforms/Utils/Cloning.h"
5258b69754SDimitry Andric #include "llvm/Transforms/Utils/CodeExtractor.h"
53044eb2f6SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
54044eb2f6SDimitry Andric #include <algorithm>
55044eb2f6SDimitry Andric #include <cassert>
56044eb2f6SDimitry Andric #include <cstdint>
57044eb2f6SDimitry Andric #include <memory>
58044eb2f6SDimitry Andric #include <tuple>
59044eb2f6SDimitry Andric #include <vector>
60044eb2f6SDimitry Andric
61600c6fa1SEd Schouten using namespace llvm;
62600c6fa1SEd Schouten
6312f3ca4cSDimitry Andric #define DEBUG_TYPE "partial-inlining"
645ca98fd9SDimitry Andric
65a303c417SDimitry Andric STATISTIC(NumPartialInlined,
66a303c417SDimitry Andric "Number of callsites functions partially inlined into.");
67044eb2f6SDimitry Andric STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
68044eb2f6SDimitry Andric "cold outlined regions were partially "
69044eb2f6SDimitry Andric "inlined into its caller(s).");
70044eb2f6SDimitry Andric STATISTIC(NumColdRegionsFound,
71044eb2f6SDimitry Andric "Number of cold single entry/exit regions found.");
72044eb2f6SDimitry Andric STATISTIC(NumColdRegionsOutlined,
73044eb2f6SDimitry Andric "Number of cold single entry/exit regions outlined.");
74b2f21fb0SEd Schouten
7512f3ca4cSDimitry Andric // Command line option to disable partial-inlining. The default is false:
7612f3ca4cSDimitry Andric static cl::opt<bool>
7712f3ca4cSDimitry Andric DisablePartialInlining("disable-partial-inlining", cl::init(false),
78044eb2f6SDimitry Andric cl::Hidden, cl::desc("Disable partial inlining"));
79044eb2f6SDimitry Andric // Command line option to disable multi-region partial-inlining. The default is
80044eb2f6SDimitry Andric // false:
81044eb2f6SDimitry Andric static cl::opt<bool> DisableMultiRegionPartialInline(
82044eb2f6SDimitry Andric "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
83044eb2f6SDimitry Andric cl::desc("Disable multi-region partial inlining"));
84044eb2f6SDimitry Andric
85044eb2f6SDimitry Andric // Command line option to force outlining in regions with live exit variables.
86044eb2f6SDimitry Andric // The default is false:
87044eb2f6SDimitry Andric static cl::opt<bool>
88044eb2f6SDimitry Andric ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
89044eb2f6SDimitry Andric cl::desc("Force outline regions with live exits"));
90044eb2f6SDimitry Andric
91044eb2f6SDimitry Andric // Command line option to enable marking outline functions with Cold Calling
92044eb2f6SDimitry Andric // Convention. The default is false:
93044eb2f6SDimitry Andric static cl::opt<bool>
94044eb2f6SDimitry Andric MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
95044eb2f6SDimitry Andric cl::desc("Mark outline function calls with ColdCC"));
96044eb2f6SDimitry Andric
976b3f41edSDimitry Andric // This is an option used by testing:
986b3f41edSDimitry Andric static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
99145449b1SDimitry Andric
1006b3f41edSDimitry Andric cl::ReallyHidden,
1016b3f41edSDimitry Andric cl::desc("Skip Cost Analysis"));
102044eb2f6SDimitry Andric // Used to determine if a cold region is worth outlining based on
103044eb2f6SDimitry Andric // its inlining cost compared to the original function. Default is set at 10%.
104044eb2f6SDimitry Andric // ie. if the cold region reduces the inlining cost of the original function by
105044eb2f6SDimitry Andric // at least 10%.
106044eb2f6SDimitry Andric static cl::opt<float> MinRegionSizeRatio(
107044eb2f6SDimitry Andric "min-region-size-ratio", cl::init(0.1), cl::Hidden,
108044eb2f6SDimitry Andric cl::desc("Minimum ratio comparing relative sizes of each "
109044eb2f6SDimitry Andric "outline candidate and original function"));
110044eb2f6SDimitry Andric // Used to tune the minimum number of execution counts needed in the predecessor
111044eb2f6SDimitry Andric // block to the cold edge. ie. confidence interval.
112044eb2f6SDimitry Andric static cl::opt<unsigned>
113044eb2f6SDimitry Andric MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114044eb2f6SDimitry Andric cl::desc("Minimum block executions to consider "
115044eb2f6SDimitry Andric "its BranchProbabilityInfo valid"));
116044eb2f6SDimitry Andric // Used to determine when an edge is considered cold. Default is set to 10%. ie.
117044eb2f6SDimitry Andric // if the branch probability is 10% or less, then it is deemed as 'cold'.
118044eb2f6SDimitry Andric static cl::opt<float> ColdBranchRatio(
119044eb2f6SDimitry Andric "cold-branch-ratio", cl::init(0.1), cl::Hidden,
120044eb2f6SDimitry Andric cl::desc("Minimum BranchProbability to consider a region cold."));
12112f3ca4cSDimitry Andric
122a303c417SDimitry Andric static cl::opt<unsigned> MaxNumInlineBlocks(
123a303c417SDimitry Andric "max-num-inline-blocks", cl::init(5), cl::Hidden,
124044eb2f6SDimitry Andric cl::desc("Max number of blocks to be partially inlined"));
125a303c417SDimitry Andric
12612f3ca4cSDimitry Andric // Command line option to set the maximum number of partial inlining allowed
12712f3ca4cSDimitry Andric // for the module. The default value of -1 means no limit.
12812f3ca4cSDimitry Andric static cl::opt<int> MaxNumPartialInlining(
129145449b1SDimitry Andric "max-partial-inlining", cl::init(-1), cl::Hidden,
13012f3ca4cSDimitry Andric cl::desc("Max number of partial inlining. The default is unlimited"));
13112f3ca4cSDimitry Andric
1326b3f41edSDimitry Andric // Used only when PGO or user annotated branch data is absent. It is
1336b3f41edSDimitry Andric // the least value that is used to weigh the outline region. If BFI
1346b3f41edSDimitry Andric // produces larger value, the BFI value will be used.
1356b3f41edSDimitry Andric static cl::opt<int>
1366b3f41edSDimitry Andric OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
137145449b1SDimitry Andric cl::Hidden,
1386b3f41edSDimitry Andric cl::desc("Relative frequency of outline region to "
1396b3f41edSDimitry Andric "the entry block"));
1406b3f41edSDimitry Andric
141d288ef4cSDimitry Andric static cl::opt<unsigned> ExtraOutliningPenalty(
142d288ef4cSDimitry Andric "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
143d288ef4cSDimitry Andric cl::desc("A debug option to add additional penalty to the computed one."));
144d288ef4cSDimitry Andric
145600c6fa1SEd Schouten namespace {
146a303c417SDimitry Andric
147a303c417SDimitry Andric struct FunctionOutliningInfo {
148044eb2f6SDimitry Andric FunctionOutliningInfo() = default;
149044eb2f6SDimitry Andric
150a303c417SDimitry Andric // Returns the number of blocks to be inlined including all blocks
151a303c417SDimitry Andric // in Entries and one return block.
getNumInlinedBlocks__anonb7b3c81f0111::FunctionOutliningInfo152b60736ecSDimitry Andric unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
153a303c417SDimitry Andric
154a303c417SDimitry Andric // A set of blocks including the function entry that guard
155a303c417SDimitry Andric // the region to be outlined.
156a303c417SDimitry Andric SmallVector<BasicBlock *, 4> Entries;
157044eb2f6SDimitry Andric
158a303c417SDimitry Andric // The return block that is not included in the outlined region.
159044eb2f6SDimitry Andric BasicBlock *ReturnBlock = nullptr;
160044eb2f6SDimitry Andric
161d288ef4cSDimitry Andric // The dominating block of the region to be outlined.
162044eb2f6SDimitry Andric BasicBlock *NonReturnBlock = nullptr;
163044eb2f6SDimitry Andric
164b1c73532SDimitry Andric // The set of blocks in Entries that are predecessors to ReturnBlock
165a303c417SDimitry Andric SmallVector<BasicBlock *, 4> ReturnBlockPreds;
166a303c417SDimitry Andric };
167a303c417SDimitry Andric
168044eb2f6SDimitry Andric struct FunctionOutliningMultiRegionInfo {
169145449b1SDimitry Andric FunctionOutliningMultiRegionInfo() = default;
170044eb2f6SDimitry Andric
171044eb2f6SDimitry Andric // Container for outline regions
172044eb2f6SDimitry Andric struct OutlineRegionInfo {
OutlineRegionInfo__anonb7b3c81f0111::FunctionOutliningMultiRegionInfo::OutlineRegionInfo173e6d15924SDimitry Andric OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
174044eb2f6SDimitry Andric BasicBlock *EntryBlock, BasicBlock *ExitBlock,
175044eb2f6SDimitry Andric BasicBlock *ReturnBlock)
176e6d15924SDimitry Andric : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
177e6d15924SDimitry Andric ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
178044eb2f6SDimitry Andric SmallVector<BasicBlock *, 8> Region;
179044eb2f6SDimitry Andric BasicBlock *EntryBlock;
180044eb2f6SDimitry Andric BasicBlock *ExitBlock;
181044eb2f6SDimitry Andric BasicBlock *ReturnBlock;
182044eb2f6SDimitry Andric };
183044eb2f6SDimitry Andric
184044eb2f6SDimitry Andric SmallVector<OutlineRegionInfo, 4> ORI;
185044eb2f6SDimitry Andric };
186044eb2f6SDimitry Andric
187b915e9e0SDimitry Andric struct PartialInlinerImpl {
188044eb2f6SDimitry Andric
PartialInlinerImpl__anonb7b3c81f0111::PartialInlinerImpl189a303c417SDimitry Andric PartialInlinerImpl(
190cfca06d7SDimitry Andric function_ref<AssumptionCache &(Function &)> GetAC,
191e6d15924SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC,
192cfca06d7SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GTTI,
193cfca06d7SDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> GTLI,
194cfca06d7SDimitry Andric ProfileSummaryInfo &ProfSI,
195cfca06d7SDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
196e6d15924SDimitry Andric : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
197cfca06d7SDimitry Andric GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
198044eb2f6SDimitry Andric
199b915e9e0SDimitry Andric bool run(Module &M);
200044eb2f6SDimitry Andric // Main part of the transformation that calls helper functions to find
201044eb2f6SDimitry Andric // outlining candidates, clone & outline the function, and attempt to
202044eb2f6SDimitry Andric // partially inline the resulting function. Returns true if
203044eb2f6SDimitry Andric // inlining was successful, false otherwise. Also returns the outline
204044eb2f6SDimitry Andric // function (only if we partially inlined early returns) as there is a
205044eb2f6SDimitry Andric // possibility to further "peel" early return statements that were left in the
206044eb2f6SDimitry Andric // outline function due to code size.
207b60736ecSDimitry Andric std::pair<bool, Function *> unswitchFunction(Function &F);
208b915e9e0SDimitry Andric
209eb11fae6SDimitry Andric // This class speculatively clones the function to be partial inlined.
2107c7aba6eSDimitry Andric // At the end of partial inlining, the remaining callsites to the cloned
2117c7aba6eSDimitry Andric // function that are not partially inlined will be fixed up to reference
2127c7aba6eSDimitry Andric // the original function, and the cloned function will be erased.
2137c7aba6eSDimitry Andric struct FunctionCloner {
214044eb2f6SDimitry Andric // Two constructors, one for single region outlining, the other for
215044eb2f6SDimitry Andric // multi-region outlining.
216044eb2f6SDimitry Andric FunctionCloner(Function *F, FunctionOutliningInfo *OI,
217e6d15924SDimitry Andric OptimizationRemarkEmitter &ORE,
218b60736ecSDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC,
219b60736ecSDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI);
220044eb2f6SDimitry Andric FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
221e6d15924SDimitry Andric OptimizationRemarkEmitter &ORE,
222b60736ecSDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC,
223b60736ecSDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI);
224b60736ecSDimitry Andric
2257c7aba6eSDimitry Andric ~FunctionCloner();
2267c7aba6eSDimitry Andric
2277c7aba6eSDimitry Andric // Prepare for function outlining: making sure there is only
2287c7aba6eSDimitry Andric // one incoming edge from the extracted/outlined region to
2297c7aba6eSDimitry Andric // the return block.
230b60736ecSDimitry Andric void normalizeReturnBlock() const;
2317c7aba6eSDimitry Andric
232044eb2f6SDimitry Andric // Do function outlining for cold regions.
233044eb2f6SDimitry Andric bool doMultiRegionFunctionOutlining();
234044eb2f6SDimitry Andric // Do function outlining for region after early return block(s).
235044eb2f6SDimitry Andric // NOTE: For vararg functions that do the vararg handling in the outlined
236044eb2f6SDimitry Andric // function, we temporarily generate IR that does not properly
237044eb2f6SDimitry Andric // forward varargs to the outlined function. Calling InlineFunction
238044eb2f6SDimitry Andric // will update calls to the outlined functions to properly forward
239044eb2f6SDimitry Andric // the varargs.
240044eb2f6SDimitry Andric Function *doSingleRegionFunctionOutlining();
2417c7aba6eSDimitry Andric
2427c7aba6eSDimitry Andric Function *OrigFunc = nullptr;
2437c7aba6eSDimitry Andric Function *ClonedFunc = nullptr;
244044eb2f6SDimitry Andric
245044eb2f6SDimitry Andric typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
246044eb2f6SDimitry Andric // Keep track of Outlined Functions and the basic block they're called from.
247044eb2f6SDimitry Andric SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
248044eb2f6SDimitry Andric
2497c7aba6eSDimitry Andric // ClonedFunc is inlined in one of its callers after function
2507c7aba6eSDimitry Andric // outlining.
2517c7aba6eSDimitry Andric bool IsFunctionInlined = false;
2527c7aba6eSDimitry Andric // The cost of the region to be outlined.
253344a3780SDimitry Andric InstructionCost OutlinedRegionCost = 0;
254044eb2f6SDimitry Andric // ClonedOI is specific to outlining non-early return blocks.
2557c7aba6eSDimitry Andric std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
256044eb2f6SDimitry Andric // ClonedOMRI is specific to outlining cold regions.
257044eb2f6SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
2587c7aba6eSDimitry Andric std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
259044eb2f6SDimitry Andric OptimizationRemarkEmitter &ORE;
260e6d15924SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC;
261b60736ecSDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI;
2627c7aba6eSDimitry Andric };
2637c7aba6eSDimitry Andric
264a303c417SDimitry Andric private:
265a303c417SDimitry Andric int NumPartialInlining = 0;
266cfca06d7SDimitry Andric function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
267e6d15924SDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
268cfca06d7SDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI;
269cfca06d7SDimitry Andric function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
270cfca06d7SDimitry Andric function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
271cfca06d7SDimitry Andric ProfileSummaryInfo &PSI;
272a303c417SDimitry Andric
2736b3f41edSDimitry Andric // Return the frequency of the OutlininingBB relative to F's entry point.
2746b3f41edSDimitry Andric // The result is no larger than 1 and is represented using BP.
2756b3f41edSDimitry Andric // (Note that the outlined region's 'head' block can only have incoming
2766b3f41edSDimitry Andric // edges from the guarding entry blocks).
277b60736ecSDimitry Andric BranchProbability
278b60736ecSDimitry Andric getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
2796b3f41edSDimitry Andric
280cfca06d7SDimitry Andric // Return true if the callee of CB should be partially inlined with
2816b3f41edSDimitry Andric // profit.
282cfca06d7SDimitry Andric bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
283eb11fae6SDimitry Andric BlockFrequency WeightedOutliningRcost,
284b60736ecSDimitry Andric OptimizationRemarkEmitter &ORE) const;
2856b3f41edSDimitry Andric
2866b3f41edSDimitry Andric // Try to inline DuplicateFunction (cloned from F with call to
2876b3f41edSDimitry Andric // the OutlinedFunction into its callers. Return true
2886b3f41edSDimitry Andric // if there is any successful inlining.
2897c7aba6eSDimitry Andric bool tryPartialInline(FunctionCloner &Cloner);
2906b3f41edSDimitry Andric
2916b3f41edSDimitry Andric // Compute the mapping from use site of DuplicationFunction to the enclosing
2926b3f41edSDimitry Andric // BB's profile count.
293b60736ecSDimitry Andric void
294b60736ecSDimitry Andric computeCallsiteToProfCountMap(Function *DuplicateFunction,
295b60736ecSDimitry Andric DenseMap<User *, uint64_t> &SiteCountMap) const;
2966b3f41edSDimitry Andric
isLimitReached__anonb7b3c81f0111::PartialInlinerImpl297b60736ecSDimitry Andric bool isLimitReached() const {
29812f3ca4cSDimitry Andric return (MaxNumPartialInlining != -1 &&
29912f3ca4cSDimitry Andric NumPartialInlining >= MaxNumPartialInlining);
30012f3ca4cSDimitry Andric }
3016b3f41edSDimitry Andric
getSupportedCallBase__anonb7b3c81f0111::PartialInlinerImpl302cfca06d7SDimitry Andric static CallBase *getSupportedCallBase(User *U) {
303cfca06d7SDimitry Andric if (isa<CallInst>(U) || isa<InvokeInst>(U))
304cfca06d7SDimitry Andric return cast<CallBase>(U);
3056b3f41edSDimitry Andric llvm_unreachable("All uses must be calls");
306cfca06d7SDimitry Andric return nullptr;
3076b3f41edSDimitry Andric }
3086b3f41edSDimitry Andric
getOneCallSiteTo__anonb7b3c81f0111::PartialInlinerImpl309b60736ecSDimitry Andric static CallBase *getOneCallSiteTo(Function &F) {
310b60736ecSDimitry Andric User *User = *F.user_begin();
311cfca06d7SDimitry Andric return getSupportedCallBase(User);
3126b3f41edSDimitry Andric }
3136b3f41edSDimitry Andric
getOneDebugLoc__anonb7b3c81f0111::PartialInlinerImpl314b60736ecSDimitry Andric std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
315cfca06d7SDimitry Andric CallBase *CB = getOneCallSiteTo(F);
316cfca06d7SDimitry Andric DebugLoc DLoc = CB->getDebugLoc();
317cfca06d7SDimitry Andric BasicBlock *Block = CB->getParent();
3186b3f41edSDimitry Andric return std::make_tuple(DLoc, Block);
3196b3f41edSDimitry Andric }
3206b3f41edSDimitry Andric
3216b3f41edSDimitry Andric // Returns the costs associated with function outlining:
3226b3f41edSDimitry Andric // - The first value is the non-weighted runtime cost for making the call
3237c7aba6eSDimitry Andric // to the outlined function, including the addtional setup cost in the
3247c7aba6eSDimitry Andric // outlined function itself;
3256b3f41edSDimitry Andric // - The second value is the estimated size of the new call sequence in
3267c7aba6eSDimitry Andric // basic block Cloner.OutliningCallBB;
327344a3780SDimitry Andric std::tuple<InstructionCost, InstructionCost>
328344a3780SDimitry Andric computeOutliningCosts(FunctionCloner &Cloner) const;
329044eb2f6SDimitry Andric
3306b3f41edSDimitry Andric // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
3316b3f41edSDimitry Andric // approximate both the size and runtime cost (Note that in the current
3326b3f41edSDimitry Andric // inline cost analysis, there is no clear distinction there either).
333344a3780SDimitry Andric static InstructionCost computeBBInlineCost(BasicBlock *BB,
334344a3780SDimitry Andric TargetTransformInfo *TTI);
3356b3f41edSDimitry Andric
336b60736ecSDimitry Andric std::unique_ptr<FunctionOutliningInfo>
337b60736ecSDimitry Andric computeOutliningInfo(Function &F) const;
338b60736ecSDimitry Andric
339044eb2f6SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo>
340b60736ecSDimitry Andric computeOutliningColdRegionsInfo(Function &F,
341b60736ecSDimitry Andric OptimizationRemarkEmitter &ORE) const;
342b915e9e0SDimitry Andric };
343a303c417SDimitry Andric
344044eb2f6SDimitry Andric } // end anonymous namespace
345044eb2f6SDimitry Andric
346044eb2f6SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function & F,OptimizationRemarkEmitter & ORE) const347b60736ecSDimitry Andric PartialInlinerImpl::computeOutliningColdRegionsInfo(
348b60736ecSDimitry Andric Function &F, OptimizationRemarkEmitter &ORE) const {
349b60736ecSDimitry Andric BasicBlock *EntryBlock = &F.front();
350044eb2f6SDimitry Andric
351b60736ecSDimitry Andric DominatorTree DT(F);
352044eb2f6SDimitry Andric LoopInfo LI(DT);
353b60736ecSDimitry Andric BranchProbabilityInfo BPI(F, LI);
354044eb2f6SDimitry Andric std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
355044eb2f6SDimitry Andric BlockFrequencyInfo *BFI;
356044eb2f6SDimitry Andric if (!GetBFI) {
357b60736ecSDimitry Andric ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
358044eb2f6SDimitry Andric BFI = ScopedBFI.get();
359044eb2f6SDimitry Andric } else
360b60736ecSDimitry Andric BFI = &(GetBFI(F));
361044eb2f6SDimitry Andric
362044eb2f6SDimitry Andric // Return if we don't have profiling information.
363cfca06d7SDimitry Andric if (!PSI.hasInstrumentationProfile())
364044eb2f6SDimitry Andric return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
365044eb2f6SDimitry Andric
366044eb2f6SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
3671d5ae102SDimitry Andric std::make_unique<FunctionOutliningMultiRegionInfo>();
368044eb2f6SDimitry Andric
369044eb2f6SDimitry Andric auto IsSingleExit =
370044eb2f6SDimitry Andric [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
371044eb2f6SDimitry Andric BasicBlock *ExitBlock = nullptr;
372044eb2f6SDimitry Andric for (auto *Block : BlockList) {
373344a3780SDimitry Andric for (BasicBlock *Succ : successors(Block)) {
374344a3780SDimitry Andric if (!is_contained(BlockList, Succ)) {
375044eb2f6SDimitry Andric if (ExitBlock) {
376044eb2f6SDimitry Andric ORE.emit([&]() {
377044eb2f6SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
378344a3780SDimitry Andric &Succ->front())
379044eb2f6SDimitry Andric << "Region dominated by "
380044eb2f6SDimitry Andric << ore::NV("Block", BlockList.front()->getName())
381044eb2f6SDimitry Andric << " has more than one region exit edge.";
382044eb2f6SDimitry Andric });
383044eb2f6SDimitry Andric return nullptr;
384b60736ecSDimitry Andric }
385b60736ecSDimitry Andric
386044eb2f6SDimitry Andric ExitBlock = Block;
387044eb2f6SDimitry Andric }
388044eb2f6SDimitry Andric }
389044eb2f6SDimitry Andric }
390044eb2f6SDimitry Andric return ExitBlock;
391044eb2f6SDimitry Andric };
392044eb2f6SDimitry Andric
393044eb2f6SDimitry Andric auto BBProfileCount = [BFI](BasicBlock *BB) {
394145449b1SDimitry Andric return BFI->getBlockProfileCount(BB).value_or(0);
395044eb2f6SDimitry Andric };
396044eb2f6SDimitry Andric
397044eb2f6SDimitry Andric // Use the same computeBBInlineCost function to compute the cost savings of
398044eb2f6SDimitry Andric // the outlining the candidate region.
399b60736ecSDimitry Andric TargetTransformInfo *FTTI = &GetTTI(F);
400344a3780SDimitry Andric InstructionCost OverallFunctionCost = 0;
401b60736ecSDimitry Andric for (auto &BB : F)
402b60736ecSDimitry Andric OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
403044eb2f6SDimitry Andric
404b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
405b60736ecSDimitry Andric << "\n";);
406b60736ecSDimitry Andric
407344a3780SDimitry Andric InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
408344a3780SDimitry Andric [&](auto Cost) { return Cost * MinRegionSizeRatio; });
409344a3780SDimitry Andric
410044eb2f6SDimitry Andric BranchProbability MinBranchProbability(
411044eb2f6SDimitry Andric static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
412044eb2f6SDimitry Andric MinBlockCounterExecution);
413044eb2f6SDimitry Andric bool ColdCandidateFound = false;
414044eb2f6SDimitry Andric BasicBlock *CurrEntry = EntryBlock;
415044eb2f6SDimitry Andric std::vector<BasicBlock *> DFS;
416044eb2f6SDimitry Andric DenseMap<BasicBlock *, bool> VisitedMap;
417044eb2f6SDimitry Andric DFS.push_back(CurrEntry);
418044eb2f6SDimitry Andric VisitedMap[CurrEntry] = true;
419b60736ecSDimitry Andric
420044eb2f6SDimitry Andric // Use Depth First Search on the basic blocks to find CFG edges that are
421044eb2f6SDimitry Andric // considered cold.
422044eb2f6SDimitry Andric // Cold regions considered must also have its inline cost compared to the
423044eb2f6SDimitry Andric // overall inline cost of the original function. The region is outlined only
424044eb2f6SDimitry Andric // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
425044eb2f6SDimitry Andric // more.
426044eb2f6SDimitry Andric while (!DFS.empty()) {
427b60736ecSDimitry Andric auto *ThisBB = DFS.back();
428044eb2f6SDimitry Andric DFS.pop_back();
429044eb2f6SDimitry Andric // Only consider regions with predecessor blocks that are considered
430044eb2f6SDimitry Andric // not-cold (default: part of the top 99.99% of all block counters)
431044eb2f6SDimitry Andric // AND greater than our minimum block execution count (default: 100).
432b60736ecSDimitry Andric if (PSI.isColdBlock(ThisBB, BFI) ||
433b60736ecSDimitry Andric BBProfileCount(ThisBB) < MinBlockCounterExecution)
434044eb2f6SDimitry Andric continue;
435b60736ecSDimitry Andric for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
436044eb2f6SDimitry Andric if (VisitedMap[*SI])
437044eb2f6SDimitry Andric continue;
438044eb2f6SDimitry Andric VisitedMap[*SI] = true;
439044eb2f6SDimitry Andric DFS.push_back(*SI);
440044eb2f6SDimitry Andric // If branch isn't cold, we skip to the next one.
441b60736ecSDimitry Andric BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
442044eb2f6SDimitry Andric if (SuccProb > MinBranchProbability)
443044eb2f6SDimitry Andric continue;
444b60736ecSDimitry Andric
445b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
446b60736ecSDimitry Andric << SI->getName()
447b60736ecSDimitry Andric << "\nBranch Probability = " << SuccProb << "\n";);
448b60736ecSDimitry Andric
449044eb2f6SDimitry Andric SmallVector<BasicBlock *, 8> DominateVector;
450044eb2f6SDimitry Andric DT.getDescendants(*SI, DominateVector);
451b60736ecSDimitry Andric assert(!DominateVector.empty() &&
452b60736ecSDimitry Andric "SI should be reachable and have at least itself as descendant");
453b60736ecSDimitry Andric
454044eb2f6SDimitry Andric // We can only outline single entry regions (for now).
455b60736ecSDimitry Andric if (!DominateVector.front()->hasNPredecessors(1)) {
456b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
457b60736ecSDimitry Andric << " doesn't have a single predecessor in the "
458b60736ecSDimitry Andric "dominator tree\n";);
459044eb2f6SDimitry Andric continue;
460b60736ecSDimitry Andric }
461b60736ecSDimitry Andric
462044eb2f6SDimitry Andric BasicBlock *ExitBlock = nullptr;
463044eb2f6SDimitry Andric // We can only outline single exit regions (for now).
464b60736ecSDimitry Andric if (!(ExitBlock = IsSingleExit(DominateVector))) {
465b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
466b60736ecSDimitry Andric << " doesn't have a unique successor\n";);
467044eb2f6SDimitry Andric continue;
468b60736ecSDimitry Andric }
469b60736ecSDimitry Andric
470344a3780SDimitry Andric InstructionCost OutlineRegionCost = 0;
471044eb2f6SDimitry Andric for (auto *BB : DominateVector)
472b60736ecSDimitry Andric OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
473044eb2f6SDimitry Andric
474b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
475b60736ecSDimitry Andric << "\n";);
476044eb2f6SDimitry Andric
477b60736ecSDimitry Andric if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
478044eb2f6SDimitry Andric ORE.emit([&]() {
479044eb2f6SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
480044eb2f6SDimitry Andric &SI->front())
481b60736ecSDimitry Andric << ore::NV("Callee", &F)
482b60736ecSDimitry Andric << " inline cost-savings smaller than "
483044eb2f6SDimitry Andric << ore::NV("Cost", MinOutlineRegionCost);
484044eb2f6SDimitry Andric });
485b60736ecSDimitry Andric
486b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
487b60736ecSDimitry Andric << MinOutlineRegionCost << "\n";);
488044eb2f6SDimitry Andric continue;
489044eb2f6SDimitry Andric }
490b60736ecSDimitry Andric
491044eb2f6SDimitry Andric // For now, ignore blocks that belong to a SISE region that is a
492044eb2f6SDimitry Andric // candidate for outlining. In the future, we may want to look
493044eb2f6SDimitry Andric // at inner regions because the outer region may have live-exit
494044eb2f6SDimitry Andric // variables.
495044eb2f6SDimitry Andric for (auto *BB : DominateVector)
496044eb2f6SDimitry Andric VisitedMap[BB] = true;
497b60736ecSDimitry Andric
498044eb2f6SDimitry Andric // ReturnBlock here means the block after the outline call
499044eb2f6SDimitry Andric BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
500044eb2f6SDimitry Andric FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
501044eb2f6SDimitry Andric DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
502044eb2f6SDimitry Andric OutliningInfo->ORI.push_back(RegInfo);
503b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
504b60736ecSDimitry Andric << DominateVector.front()->getName() << "\n";);
505044eb2f6SDimitry Andric ColdCandidateFound = true;
506044eb2f6SDimitry Andric NumColdRegionsFound++;
507044eb2f6SDimitry Andric }
508044eb2f6SDimitry Andric }
509b60736ecSDimitry Andric
510044eb2f6SDimitry Andric if (ColdCandidateFound)
511044eb2f6SDimitry Andric return OutliningInfo;
512b60736ecSDimitry Andric
513044eb2f6SDimitry Andric return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
5141a82d4c0SDimitry Andric }
515600c6fa1SEd Schouten
516a303c417SDimitry Andric std::unique_ptr<FunctionOutliningInfo>
computeOutliningInfo(Function & F) const517b60736ecSDimitry Andric PartialInlinerImpl::computeOutliningInfo(Function &F) const {
518b60736ecSDimitry Andric BasicBlock *EntryBlock = &F.front();
519b915e9e0SDimitry Andric BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
52059850d08SRoman Divacky if (!BR || BR->isUnconditional())
521a303c417SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>();
522600c6fa1SEd Schouten
523a303c417SDimitry Andric // Returns true if Succ is BB's successor
524a303c417SDimitry Andric auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
525a303c417SDimitry Andric return is_contained(successors(BB), Succ);
526a303c417SDimitry Andric };
527a303c417SDimitry Andric
528a303c417SDimitry Andric auto IsReturnBlock = [](BasicBlock *BB) {
529d8e91e46SDimitry Andric Instruction *TI = BB->getTerminator();
530a303c417SDimitry Andric return isa<ReturnInst>(TI);
531a303c417SDimitry Andric };
532a303c417SDimitry Andric
5336b3f41edSDimitry Andric auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
534a303c417SDimitry Andric if (IsReturnBlock(Succ1))
535a303c417SDimitry Andric return std::make_tuple(Succ1, Succ2);
536a303c417SDimitry Andric if (IsReturnBlock(Succ2))
537a303c417SDimitry Andric return std::make_tuple(Succ2, Succ1);
538a303c417SDimitry Andric
539a303c417SDimitry Andric return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
540a303c417SDimitry Andric };
541a303c417SDimitry Andric
542a303c417SDimitry Andric // Detect a triangular shape:
5436b3f41edSDimitry Andric auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
544a303c417SDimitry Andric if (IsSuccessor(Succ1, Succ2))
545a303c417SDimitry Andric return std::make_tuple(Succ1, Succ2);
546a303c417SDimitry Andric if (IsSuccessor(Succ2, Succ1))
547a303c417SDimitry Andric return std::make_tuple(Succ2, Succ1);
548a303c417SDimitry Andric
549a303c417SDimitry Andric return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
550a303c417SDimitry Andric };
551a303c417SDimitry Andric
552a303c417SDimitry Andric std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
5531d5ae102SDimitry Andric std::make_unique<FunctionOutliningInfo>();
554a303c417SDimitry Andric
555a303c417SDimitry Andric BasicBlock *CurrEntry = EntryBlock;
556a303c417SDimitry Andric bool CandidateFound = false;
557a303c417SDimitry Andric do {
558a303c417SDimitry Andric // The number of blocks to be inlined has already reached
559a303c417SDimitry Andric // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
560a303c417SDimitry Andric // disables partial inlining for the function.
561b60736ecSDimitry Andric if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
562a303c417SDimitry Andric break;
563a303c417SDimitry Andric
564eb11fae6SDimitry Andric if (succ_size(CurrEntry) != 2)
565a303c417SDimitry Andric break;
566a303c417SDimitry Andric
567a303c417SDimitry Andric BasicBlock *Succ1 = *succ_begin(CurrEntry);
568a303c417SDimitry Andric BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
569a303c417SDimitry Andric
570a303c417SDimitry Andric BasicBlock *ReturnBlock, *NonReturnBlock;
571a303c417SDimitry Andric std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
572a303c417SDimitry Andric
573a303c417SDimitry Andric if (ReturnBlock) {
574a303c417SDimitry Andric OutliningInfo->Entries.push_back(CurrEntry);
575a303c417SDimitry Andric OutliningInfo->ReturnBlock = ReturnBlock;
576a303c417SDimitry Andric OutliningInfo->NonReturnBlock = NonReturnBlock;
577a303c417SDimitry Andric CandidateFound = true;
578a303c417SDimitry Andric break;
5795a5ac124SDimitry Andric }
580600c6fa1SEd Schouten
581b60736ecSDimitry Andric BasicBlock *CommSucc, *OtherSucc;
582a303c417SDimitry Andric std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
583a303c417SDimitry Andric
584a303c417SDimitry Andric if (!CommSucc)
585a303c417SDimitry Andric break;
586a303c417SDimitry Andric
587a303c417SDimitry Andric OutliningInfo->Entries.push_back(CurrEntry);
588a303c417SDimitry Andric CurrEntry = OtherSucc;
589a303c417SDimitry Andric } while (true);
590a303c417SDimitry Andric
591a303c417SDimitry Andric if (!CandidateFound)
592a303c417SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>();
593a303c417SDimitry Andric
594f65dcba8SDimitry Andric // There should not be any successors (not in the entry set) other than
595a303c417SDimitry Andric // {ReturnBlock, NonReturnBlock}
596b60736ecSDimitry Andric assert(OutliningInfo->Entries[0] == &F.front() &&
5976b3f41edSDimitry Andric "Function Entry must be the first in Entries vector");
598a303c417SDimitry Andric DenseSet<BasicBlock *> Entries;
599a303c417SDimitry Andric for (BasicBlock *E : OutliningInfo->Entries)
600a303c417SDimitry Andric Entries.insert(E);
601a303c417SDimitry Andric
602a303c417SDimitry Andric // Returns true of BB has Predecessor which is not
603a303c417SDimitry Andric // in Entries set.
604a303c417SDimitry Andric auto HasNonEntryPred = [Entries](BasicBlock *BB) {
605b60736ecSDimitry Andric for (auto *Pred : predecessors(BB)) {
606a303c417SDimitry Andric if (!Entries.count(Pred))
607a303c417SDimitry Andric return true;
608a303c417SDimitry Andric }
609a303c417SDimitry Andric return false;
610a303c417SDimitry Andric };
611a303c417SDimitry Andric auto CheckAndNormalizeCandidate =
612a303c417SDimitry Andric [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
613a303c417SDimitry Andric for (BasicBlock *E : OutliningInfo->Entries) {
614b60736ecSDimitry Andric for (auto *Succ : successors(E)) {
615a303c417SDimitry Andric if (Entries.count(Succ))
616a303c417SDimitry Andric continue;
617a303c417SDimitry Andric if (Succ == OutliningInfo->ReturnBlock)
618a303c417SDimitry Andric OutliningInfo->ReturnBlockPreds.push_back(E);
619a303c417SDimitry Andric else if (Succ != OutliningInfo->NonReturnBlock)
620a303c417SDimitry Andric return false;
621a303c417SDimitry Andric }
622a303c417SDimitry Andric // There should not be any outside incoming edges either:
623a303c417SDimitry Andric if (HasNonEntryPred(E))
624a303c417SDimitry Andric return false;
625a303c417SDimitry Andric }
626a303c417SDimitry Andric return true;
627a303c417SDimitry Andric };
628a303c417SDimitry Andric
629a303c417SDimitry Andric if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
630a303c417SDimitry Andric return std::unique_ptr<FunctionOutliningInfo>();
631a303c417SDimitry Andric
632a303c417SDimitry Andric // Now further growing the candidate's inlining region by
633a303c417SDimitry Andric // peeling off dominating blocks from the outlining region:
634b60736ecSDimitry Andric while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
635a303c417SDimitry Andric BasicBlock *Cand = OutliningInfo->NonReturnBlock;
636eb11fae6SDimitry Andric if (succ_size(Cand) != 2)
637a303c417SDimitry Andric break;
638a303c417SDimitry Andric
639a303c417SDimitry Andric if (HasNonEntryPred(Cand))
640a303c417SDimitry Andric break;
641a303c417SDimitry Andric
642a303c417SDimitry Andric BasicBlock *Succ1 = *succ_begin(Cand);
643a303c417SDimitry Andric BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
644a303c417SDimitry Andric
645a303c417SDimitry Andric BasicBlock *ReturnBlock, *NonReturnBlock;
646a303c417SDimitry Andric std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
647a303c417SDimitry Andric if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
648a303c417SDimitry Andric break;
649a303c417SDimitry Andric
650a303c417SDimitry Andric if (NonReturnBlock->getSinglePredecessor() != Cand)
651a303c417SDimitry Andric break;
652a303c417SDimitry Andric
653a303c417SDimitry Andric // Now grow and update OutlininigInfo:
654a303c417SDimitry Andric OutliningInfo->Entries.push_back(Cand);
655a303c417SDimitry Andric OutliningInfo->NonReturnBlock = NonReturnBlock;
656a303c417SDimitry Andric OutliningInfo->ReturnBlockPreds.push_back(Cand);
657a303c417SDimitry Andric Entries.insert(Cand);
658a303c417SDimitry Andric }
659a303c417SDimitry Andric
660a303c417SDimitry Andric return OutliningInfo;
661a303c417SDimitry Andric }
662a303c417SDimitry Andric
663706b4fc4SDimitry Andric // Check if there is PGO data or user annotated branch data:
hasProfileData(const Function & F,const FunctionOutliningInfo & OI)664b60736ecSDimitry Andric static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
665b60736ecSDimitry Andric if (F.hasProfileData())
6666b3f41edSDimitry Andric return true;
6676b3f41edSDimitry Andric // Now check if any of the entry block has MD_prof data:
668b60736ecSDimitry Andric for (auto *E : OI.Entries) {
6696b3f41edSDimitry Andric BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
6706b3f41edSDimitry Andric if (!BR || BR->isUnconditional())
6716b3f41edSDimitry Andric continue;
672e3b55780SDimitry Andric if (hasBranchWeightMD(*BR))
6736b3f41edSDimitry Andric return true;
6746b3f41edSDimitry Andric }
6756b3f41edSDimitry Andric return false;
6766b3f41edSDimitry Andric }
6776b3f41edSDimitry Andric
getOutliningCallBBRelativeFreq(FunctionCloner & Cloner) const678b60736ecSDimitry Andric BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
679b60736ecSDimitry Andric FunctionCloner &Cloner) const {
680044eb2f6SDimitry Andric BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
6816b3f41edSDimitry Andric auto EntryFreq =
6827c7aba6eSDimitry Andric Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
6837c7aba6eSDimitry Andric auto OutliningCallFreq =
684044eb2f6SDimitry Andric Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
685044eb2f6SDimitry Andric // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
686044eb2f6SDimitry Andric // we outlined any regions, so we may encounter situations where the
687044eb2f6SDimitry Andric // OutliningCallFreq is *slightly* bigger than the EntryFreq.
688b60736ecSDimitry Andric if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
689044eb2f6SDimitry Andric OutliningCallFreq = EntryFreq;
690b60736ecSDimitry Andric
691044eb2f6SDimitry Andric auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
692044eb2f6SDimitry Andric OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
6936b3f41edSDimitry Andric
694145449b1SDimitry Andric if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
6956b3f41edSDimitry Andric return OutlineRegionRelFreq;
6966b3f41edSDimitry Andric
697d288ef4cSDimitry Andric // When profile data is not available, we need to be conservative in
698d288ef4cSDimitry Andric // estimating the overall savings. Static branch prediction can usually
699d288ef4cSDimitry Andric // guess the branch direction right (taken/non-taken), but the guessed
700d288ef4cSDimitry Andric // branch probability is usually not biased enough. In case when the
701d288ef4cSDimitry Andric // outlined region is predicted to be likely, its probability needs
702d288ef4cSDimitry Andric // to be made higher (more biased) to not under-estimate the cost of
703d288ef4cSDimitry Andric // function outlining. On the other hand, if the outlined region
704d288ef4cSDimitry Andric // is predicted to be less likely, the predicted probablity is usually
705d288ef4cSDimitry Andric // higher than the actual. For instance, the actual probability of the
706d288ef4cSDimitry Andric // less likely target is only 5%, but the guessed probablity can be
707e3b55780SDimitry Andric // 40%. In the latter case, there is no need for further adjustment.
708d288ef4cSDimitry Andric // FIXME: add an option for this.
709d288ef4cSDimitry Andric if (OutlineRegionRelFreq < BranchProbability(45, 100))
710d288ef4cSDimitry Andric return OutlineRegionRelFreq;
711d288ef4cSDimitry Andric
712d288ef4cSDimitry Andric OutlineRegionRelFreq = std::max(
713d288ef4cSDimitry Andric OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
7146b3f41edSDimitry Andric
7156b3f41edSDimitry Andric return OutlineRegionRelFreq;
7166b3f41edSDimitry Andric }
7176b3f41edSDimitry Andric
shouldPartialInline(CallBase & CB,FunctionCloner & Cloner,BlockFrequency WeightedOutliningRcost,OptimizationRemarkEmitter & ORE) const7186b3f41edSDimitry Andric bool PartialInlinerImpl::shouldPartialInline(
719cfca06d7SDimitry Andric CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
720b60736ecSDimitry Andric OptimizationRemarkEmitter &ORE) const {
721a303c417SDimitry Andric using namespace ore;
722044eb2f6SDimitry Andric
723cfca06d7SDimitry Andric Function *Callee = CB.getCalledFunction();
7247c7aba6eSDimitry Andric assert(Callee == Cloner.ClonedFunc);
7257c7aba6eSDimitry Andric
726eb11fae6SDimitry Andric if (SkipCostAnalysis)
727cfca06d7SDimitry Andric return isInlineViable(*Callee).isSuccess();
728eb11fae6SDimitry Andric
729cfca06d7SDimitry Andric Function *Caller = CB.getCaller();
730cfca06d7SDimitry Andric auto &CalleeTTI = GetTTI(*Callee);
731e6d15924SDimitry Andric bool RemarksEnabled =
732e6d15924SDimitry Andric Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
733e6d15924SDimitry Andric DEBUG_TYPE);
734cfca06d7SDimitry Andric InlineCost IC =
735cfca06d7SDimitry Andric getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
736cfca06d7SDimitry Andric GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
737a303c417SDimitry Andric
738a303c417SDimitry Andric if (IC.isAlways()) {
739044eb2f6SDimitry Andric ORE.emit([&]() {
740cfca06d7SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
7417c7aba6eSDimitry Andric << NV("Callee", Cloner.OrigFunc)
742044eb2f6SDimitry Andric << " should always be fully inlined, not partially";
743044eb2f6SDimitry Andric });
744a303c417SDimitry Andric return false;
745a303c417SDimitry Andric }
746a303c417SDimitry Andric
747a303c417SDimitry Andric if (IC.isNever()) {
748044eb2f6SDimitry Andric ORE.emit([&]() {
749cfca06d7SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
7507c7aba6eSDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
751a303c417SDimitry Andric << NV("Caller", Caller)
752044eb2f6SDimitry Andric << " because it should never be inlined (cost=never)";
753044eb2f6SDimitry Andric });
754a303c417SDimitry Andric return false;
755a303c417SDimitry Andric }
756a303c417SDimitry Andric
757a303c417SDimitry Andric if (!IC) {
758044eb2f6SDimitry Andric ORE.emit([&]() {
759cfca06d7SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
7607c7aba6eSDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
761a303c417SDimitry Andric << NV("Caller", Caller) << " because too costly to inline (cost="
762a303c417SDimitry Andric << NV("Cost", IC.getCost()) << ", threshold="
763044eb2f6SDimitry Andric << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
764044eb2f6SDimitry Andric });
765a303c417SDimitry Andric return false;
766a303c417SDimitry Andric }
767ac9a064cSDimitry Andric const DataLayout &DL = Caller->getDataLayout();
7687c7aba6eSDimitry Andric
7696b3f41edSDimitry Andric // The savings of eliminating the call:
770b1c73532SDimitry Andric int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
7716b3f41edSDimitry Andric BlockFrequency NormWeightedSavings(NonWeightedSavings);
7726b3f41edSDimitry Andric
7736b3f41edSDimitry Andric // Weighted saving is smaller than weighted cost, return false
7747c7aba6eSDimitry Andric if (NormWeightedSavings < WeightedOutliningRcost) {
775044eb2f6SDimitry Andric ORE.emit([&]() {
776044eb2f6SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
777cfca06d7SDimitry Andric &CB)
7787c7aba6eSDimitry Andric << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
7796b3f41edSDimitry Andric << NV("Caller", Caller) << " runtime overhead (overhead="
7807c7aba6eSDimitry Andric << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
7816b3f41edSDimitry Andric << ", savings="
782044eb2f6SDimitry Andric << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
783044eb2f6SDimitry Andric << ")"
784044eb2f6SDimitry Andric << " of making the outlined call is too high";
785044eb2f6SDimitry Andric });
7866b3f41edSDimitry Andric
7876b3f41edSDimitry Andric return false;
7886b3f41edSDimitry Andric }
789a303c417SDimitry Andric
790044eb2f6SDimitry Andric ORE.emit([&]() {
791cfca06d7SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
7927c7aba6eSDimitry Andric << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
793a303c417SDimitry Andric << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
794a303c417SDimitry Andric << " (threshold="
795044eb2f6SDimitry Andric << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
796044eb2f6SDimitry Andric });
797a303c417SDimitry Andric return true;
798a303c417SDimitry Andric }
799a303c417SDimitry Andric
8006b3f41edSDimitry Andric // TODO: Ideally we should share Inliner's InlineCost Analysis code.
8016b3f41edSDimitry Andric // For now use a simplified version. The returned 'InlineCost' will be used
8026b3f41edSDimitry Andric // to esimate the size cost as well as runtime cost of the BB.
803344a3780SDimitry Andric InstructionCost
computeBBInlineCost(BasicBlock * BB,TargetTransformInfo * TTI)804344a3780SDimitry Andric PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
805b60736ecSDimitry Andric TargetTransformInfo *TTI) {
806344a3780SDimitry Andric InstructionCost InlineCost = 0;
807ac9a064cSDimitry Andric const DataLayout &DL = BB->getDataLayout();
808e3b55780SDimitry Andric int InstrCost = InlineConstants::getInstrCost();
809d8e91e46SDimitry Andric for (Instruction &I : BB->instructionsWithoutDebug()) {
810d8e91e46SDimitry Andric // Skip free instructions.
811d8e91e46SDimitry Andric switch (I.getOpcode()) {
812d288ef4cSDimitry Andric case Instruction::BitCast:
813d288ef4cSDimitry Andric case Instruction::PtrToInt:
814d288ef4cSDimitry Andric case Instruction::IntToPtr:
815d288ef4cSDimitry Andric case Instruction::Alloca:
816d8e91e46SDimitry Andric case Instruction::PHI:
817d288ef4cSDimitry Andric continue;
818d288ef4cSDimitry Andric case Instruction::GetElementPtr:
819d8e91e46SDimitry Andric if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
820d288ef4cSDimitry Andric continue;
821c7dac04cSDimitry Andric break;
822d288ef4cSDimitry Andric default:
823d288ef4cSDimitry Andric break;
824d288ef4cSDimitry Andric }
825d288ef4cSDimitry Andric
826d8e91e46SDimitry Andric if (I.isLifetimeStartOrEnd())
827d288ef4cSDimitry Andric continue;
828d288ef4cSDimitry Andric
829b60736ecSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
830b60736ecSDimitry Andric Intrinsic::ID IID = II->getIntrinsicID();
831b60736ecSDimitry Andric SmallVector<Type *, 4> Tys;
832b60736ecSDimitry Andric FastMathFlags FMF;
833b60736ecSDimitry Andric for (Value *Val : II->args())
834b60736ecSDimitry Andric Tys.push_back(Val->getType());
835b60736ecSDimitry Andric
836b60736ecSDimitry Andric if (auto *FPMO = dyn_cast<FPMathOperator>(II))
837b60736ecSDimitry Andric FMF = FPMO->getFastMathFlags();
838b60736ecSDimitry Andric
839b60736ecSDimitry Andric IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
840b60736ecSDimitry Andric InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
841b60736ecSDimitry Andric continue;
842b60736ecSDimitry Andric }
843b60736ecSDimitry Andric
844d8e91e46SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) {
845b1c73532SDimitry Andric InlineCost += getCallsiteCost(*TTI, *CI, DL);
8466b3f41edSDimitry Andric continue;
8476b3f41edSDimitry Andric }
8486b3f41edSDimitry Andric
849d8e91e46SDimitry Andric if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
850b1c73532SDimitry Andric InlineCost += getCallsiteCost(*TTI, *II, DL);
8516b3f41edSDimitry Andric continue;
8526b3f41edSDimitry Andric }
8536b3f41edSDimitry Andric
854d8e91e46SDimitry Andric if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
855e3b55780SDimitry Andric InlineCost += (SI->getNumCases() + 1) * InstrCost;
8566b3f41edSDimitry Andric continue;
8576b3f41edSDimitry Andric }
858e3b55780SDimitry Andric InlineCost += InstrCost;
8596b3f41edSDimitry Andric }
860344a3780SDimitry Andric
8616b3f41edSDimitry Andric return InlineCost;
8626b3f41edSDimitry Andric }
8636b3f41edSDimitry Andric
864344a3780SDimitry Andric std::tuple<InstructionCost, InstructionCost>
computeOutliningCosts(FunctionCloner & Cloner) const865b60736ecSDimitry Andric PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
866344a3780SDimitry Andric InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
867044eb2f6SDimitry Andric for (auto FuncBBPair : Cloner.OutlinedFunctions) {
868044eb2f6SDimitry Andric Function *OutlinedFunc = FuncBBPair.first;
869044eb2f6SDimitry Andric BasicBlock* OutliningCallBB = FuncBBPair.second;
8706b3f41edSDimitry Andric // Now compute the cost of the call sequence to the outlined function
8716b3f41edSDimitry Andric // 'OutlinedFunction' in BB 'OutliningCallBB':
872b60736ecSDimitry Andric auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
873b60736ecSDimitry Andric OutliningFuncCallCost +=
874b60736ecSDimitry Andric computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
8756b3f41edSDimitry Andric
8766b3f41edSDimitry Andric // Now compute the cost of the extracted/outlined function itself:
877044eb2f6SDimitry Andric for (BasicBlock &BB : *OutlinedFunc)
878b60736ecSDimitry Andric OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
8796b3f41edSDimitry Andric }
8807c7aba6eSDimitry Andric assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
8816b3f41edSDimitry Andric "Outlined function cost should be no less than the outlined region");
882044eb2f6SDimitry Andric
883d288ef4cSDimitry Andric // The code extractor introduces a new root and exit stub blocks with
884d288ef4cSDimitry Andric // additional unconditional branches. Those branches will be eliminated
885d288ef4cSDimitry Andric // later with bb layout. The cost should be adjusted accordingly:
886044eb2f6SDimitry Andric OutlinedFunctionCost -=
887e3b55780SDimitry Andric 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
888d288ef4cSDimitry Andric
889344a3780SDimitry Andric InstructionCost OutliningRuntimeOverhead =
8907c7aba6eSDimitry Andric OutliningFuncCallCost +
8917c7aba6eSDimitry Andric (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
892344a3780SDimitry Andric ExtraOutliningPenalty.getValue();
8936b3f41edSDimitry Andric
8947c7aba6eSDimitry Andric return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
8956b3f41edSDimitry Andric }
8966b3f41edSDimitry Andric
8976b3f41edSDimitry Andric // Create the callsite to profile count map which is
8986b3f41edSDimitry Andric // used to update the original function's entry count,
8996b3f41edSDimitry Andric // after the function is partially inlined into the callsite.
computeCallsiteToProfCountMap(Function * DuplicateFunction,DenseMap<User *,uint64_t> & CallSiteToProfCountMap) const9006b3f41edSDimitry Andric void PartialInlinerImpl::computeCallsiteToProfCountMap(
9016b3f41edSDimitry Andric Function *DuplicateFunction,
902b60736ecSDimitry Andric DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
9036b3f41edSDimitry Andric std::vector<User *> Users(DuplicateFunction->user_begin(),
9046b3f41edSDimitry Andric DuplicateFunction->user_end());
9056b3f41edSDimitry Andric Function *CurrentCaller = nullptr;
906ab44ce3dSDimitry Andric std::unique_ptr<BlockFrequencyInfo> TempBFI;
9076b3f41edSDimitry Andric BlockFrequencyInfo *CurrentCallerBFI = nullptr;
9086b3f41edSDimitry Andric
9096b3f41edSDimitry Andric auto ComputeCurrBFI = [&,this](Function *Caller) {
9106b3f41edSDimitry Andric // For the old pass manager:
9116b3f41edSDimitry Andric if (!GetBFI) {
9126b3f41edSDimitry Andric DominatorTree DT(*Caller);
9136b3f41edSDimitry Andric LoopInfo LI(DT);
9146b3f41edSDimitry Andric BranchProbabilityInfo BPI(*Caller, LI);
915ab44ce3dSDimitry Andric TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
916ab44ce3dSDimitry Andric CurrentCallerBFI = TempBFI.get();
9176b3f41edSDimitry Andric } else {
9186b3f41edSDimitry Andric // New pass manager:
919cfca06d7SDimitry Andric CurrentCallerBFI = &(GetBFI(*Caller));
9206b3f41edSDimitry Andric }
9216b3f41edSDimitry Andric };
9226b3f41edSDimitry Andric
9236b3f41edSDimitry Andric for (User *User : Users) {
9246f8fc217SDimitry Andric // Don't bother with BlockAddress used by CallBr for asm goto.
9256f8fc217SDimitry Andric if (isa<BlockAddress>(User))
9266f8fc217SDimitry Andric continue;
927cfca06d7SDimitry Andric CallBase *CB = getSupportedCallBase(User);
928cfca06d7SDimitry Andric Function *Caller = CB->getCaller();
9296b3f41edSDimitry Andric if (CurrentCaller != Caller) {
9306b3f41edSDimitry Andric CurrentCaller = Caller;
9316b3f41edSDimitry Andric ComputeCurrBFI(Caller);
9326b3f41edSDimitry Andric } else {
9336b3f41edSDimitry Andric assert(CurrentCallerBFI && "CallerBFI is not set");
9346b3f41edSDimitry Andric }
935cfca06d7SDimitry Andric BasicBlock *CallBB = CB->getParent();
9366b3f41edSDimitry Andric auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
9376b3f41edSDimitry Andric if (Count)
9386b3f41edSDimitry Andric CallSiteToProfCountMap[User] = *Count;
9396b3f41edSDimitry Andric else
9406b3f41edSDimitry Andric CallSiteToProfCountMap[User] = 0;
9416b3f41edSDimitry Andric }
9426b3f41edSDimitry Andric }
9436b3f41edSDimitry Andric
FunctionCloner(Function * F,FunctionOutliningInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)944044eb2f6SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
945e6d15924SDimitry Andric Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
946b60736ecSDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC,
947b60736ecSDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI)
948b60736ecSDimitry Andric : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
9491d5ae102SDimitry Andric ClonedOI = std::make_unique<FunctionOutliningInfo>();
9507c7aba6eSDimitry Andric
9517c7aba6eSDimitry Andric // Clone the function, so that we can hack away on it.
9527c7aba6eSDimitry Andric ValueToValueMapTy VMap;
9537c7aba6eSDimitry Andric ClonedFunc = CloneFunction(F, VMap);
9547c7aba6eSDimitry Andric
9557c7aba6eSDimitry Andric ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
9567c7aba6eSDimitry Andric ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
957b60736ecSDimitry Andric for (BasicBlock *BB : OI->Entries)
9587c7aba6eSDimitry Andric ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
959b60736ecSDimitry Andric
9607c7aba6eSDimitry Andric for (BasicBlock *E : OI->ReturnBlockPreds) {
9617c7aba6eSDimitry Andric BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
9627c7aba6eSDimitry Andric ClonedOI->ReturnBlockPreds.push_back(NewE);
9637c7aba6eSDimitry Andric }
9647c7aba6eSDimitry Andric // Go ahead and update all uses to the duplicate, so that we can just
9657c7aba6eSDimitry Andric // use the inliner functionality when we're done hacking.
9667c7aba6eSDimitry Andric F->replaceAllUsesWith(ClonedFunc);
9677c7aba6eSDimitry Andric }
9687c7aba6eSDimitry Andric
FunctionCloner(Function * F,FunctionOutliningMultiRegionInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)969044eb2f6SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
970044eb2f6SDimitry Andric Function *F, FunctionOutliningMultiRegionInfo *OI,
971e6d15924SDimitry Andric OptimizationRemarkEmitter &ORE,
972b60736ecSDimitry Andric function_ref<AssumptionCache *(Function &)> LookupAC,
973b60736ecSDimitry Andric function_ref<TargetTransformInfo &(Function &)> GetTTI)
974b60736ecSDimitry Andric : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
9751d5ae102SDimitry Andric ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
9767c7aba6eSDimitry Andric
977044eb2f6SDimitry Andric // Clone the function, so that we can hack away on it.
978044eb2f6SDimitry Andric ValueToValueMapTy VMap;
979044eb2f6SDimitry Andric ClonedFunc = CloneFunction(F, VMap);
980044eb2f6SDimitry Andric
981044eb2f6SDimitry Andric // Go through all Outline Candidate Regions and update all BasicBlock
982044eb2f6SDimitry Andric // information.
9837fa27ce4SDimitry Andric for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
984044eb2f6SDimitry Andric OI->ORI) {
985044eb2f6SDimitry Andric SmallVector<BasicBlock *, 8> Region;
986b60736ecSDimitry Andric for (BasicBlock *BB : RegionInfo.Region)
987044eb2f6SDimitry Andric Region.push_back(cast<BasicBlock>(VMap[BB]));
988b60736ecSDimitry Andric
989044eb2f6SDimitry Andric BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
990044eb2f6SDimitry Andric BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
991044eb2f6SDimitry Andric BasicBlock *NewReturnBlock = nullptr;
992044eb2f6SDimitry Andric if (RegionInfo.ReturnBlock)
993044eb2f6SDimitry Andric NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
994044eb2f6SDimitry Andric FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
995044eb2f6SDimitry Andric Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
996044eb2f6SDimitry Andric ClonedOMRI->ORI.push_back(MappedRegionInfo);
997044eb2f6SDimitry Andric }
998044eb2f6SDimitry Andric // Go ahead and update all uses to the duplicate, so that we can just
999044eb2f6SDimitry Andric // use the inliner functionality when we're done hacking.
1000044eb2f6SDimitry Andric F->replaceAllUsesWith(ClonedFunc);
1001044eb2f6SDimitry Andric }
1002044eb2f6SDimitry Andric
normalizeReturnBlock() const1003b60736ecSDimitry Andric void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1004b60736ecSDimitry Andric auto GetFirstPHI = [](BasicBlock *BB) {
10057c7aba6eSDimitry Andric BasicBlock::iterator I = BB->begin();
10067c7aba6eSDimitry Andric PHINode *FirstPhi = nullptr;
10077c7aba6eSDimitry Andric while (I != BB->end()) {
10087c7aba6eSDimitry Andric PHINode *Phi = dyn_cast<PHINode>(I);
10097c7aba6eSDimitry Andric if (!Phi)
10107c7aba6eSDimitry Andric break;
10117c7aba6eSDimitry Andric if (!FirstPhi) {
10127c7aba6eSDimitry Andric FirstPhi = Phi;
10137c7aba6eSDimitry Andric break;
10147c7aba6eSDimitry Andric }
10157c7aba6eSDimitry Andric }
10167c7aba6eSDimitry Andric return FirstPhi;
10177c7aba6eSDimitry Andric };
10187c7aba6eSDimitry Andric
1019044eb2f6SDimitry Andric // Shouldn't need to normalize PHIs if we're not outlining non-early return
1020044eb2f6SDimitry Andric // blocks.
1021044eb2f6SDimitry Andric if (!ClonedOI)
1022044eb2f6SDimitry Andric return;
1023044eb2f6SDimitry Andric
10247c7aba6eSDimitry Andric // Special hackery is needed with PHI nodes that have inputs from more than
10257c7aba6eSDimitry Andric // one extracted block. For simplicity, just split the PHIs into a two-level
10267c7aba6eSDimitry Andric // sequence of PHIs, some of which will go in the extracted region, and some
10277c7aba6eSDimitry Andric // of which will go outside.
10287c7aba6eSDimitry Andric BasicBlock *PreReturn = ClonedOI->ReturnBlock;
10297c7aba6eSDimitry Andric // only split block when necessary:
1030b60736ecSDimitry Andric PHINode *FirstPhi = GetFirstPHI(PreReturn);
10317c7aba6eSDimitry Andric unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
10327c7aba6eSDimitry Andric
10337c7aba6eSDimitry Andric if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
10347c7aba6eSDimitry Andric return;
10357c7aba6eSDimitry Andric
10367c7aba6eSDimitry Andric auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1037e3b55780SDimitry Andric if (llvm::all_equal(PN->incoming_values()))
1038e3b55780SDimitry Andric return PN->getIncomingValue(0);
10397c7aba6eSDimitry Andric return nullptr;
10407c7aba6eSDimitry Andric };
10417c7aba6eSDimitry Andric
10427c7aba6eSDimitry Andric ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
10437c7aba6eSDimitry Andric ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
10447c7aba6eSDimitry Andric BasicBlock::iterator I = PreReturn->begin();
1045b1c73532SDimitry Andric BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
10467c7aba6eSDimitry Andric SmallVector<Instruction *, 4> DeadPhis;
10477c7aba6eSDimitry Andric while (I != PreReturn->end()) {
10487c7aba6eSDimitry Andric PHINode *OldPhi = dyn_cast<PHINode>(I);
10497c7aba6eSDimitry Andric if (!OldPhi)
10507c7aba6eSDimitry Andric break;
10517c7aba6eSDimitry Andric
10527c7aba6eSDimitry Andric PHINode *RetPhi =
1053b1c73532SDimitry Andric PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
1054b1c73532SDimitry Andric RetPhi->insertBefore(Ins);
10557c7aba6eSDimitry Andric OldPhi->replaceAllUsesWith(RetPhi);
1056b1c73532SDimitry Andric Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
10577c7aba6eSDimitry Andric
10587c7aba6eSDimitry Andric RetPhi->addIncoming(&*I, PreReturn);
10597c7aba6eSDimitry Andric for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
10607c7aba6eSDimitry Andric RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
10617c7aba6eSDimitry Andric OldPhi->removeIncomingValue(E);
10627c7aba6eSDimitry Andric }
10637c7aba6eSDimitry Andric
10647c7aba6eSDimitry Andric // After incoming values splitting, the old phi may become trivial.
10657c7aba6eSDimitry Andric // Keeping the trivial phi can introduce definition inside the outline
10667c7aba6eSDimitry Andric // region which is live-out, causing necessary overhead (load, store
10677c7aba6eSDimitry Andric // arg passing etc).
10687c7aba6eSDimitry Andric if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
10697c7aba6eSDimitry Andric OldPhi->replaceAllUsesWith(OldPhiVal);
10707c7aba6eSDimitry Andric DeadPhis.push_back(OldPhi);
10717c7aba6eSDimitry Andric }
10727c7aba6eSDimitry Andric ++I;
10737c7aba6eSDimitry Andric }
10747c7aba6eSDimitry Andric for (auto *DP : DeadPhis)
10757c7aba6eSDimitry Andric DP->eraseFromParent();
10767c7aba6eSDimitry Andric
1077b60736ecSDimitry Andric for (auto *E : ClonedOI->ReturnBlockPreds)
10787c7aba6eSDimitry Andric E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
10797c7aba6eSDimitry Andric }
10807c7aba6eSDimitry Andric
doMultiRegionFunctionOutlining()1081044eb2f6SDimitry Andric bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1082044eb2f6SDimitry Andric
1083344a3780SDimitry Andric auto ComputeRegionCost =
1084344a3780SDimitry Andric [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
1085344a3780SDimitry Andric InstructionCost Cost = 0;
1086044eb2f6SDimitry Andric for (BasicBlock* BB : Region)
1087b60736ecSDimitry Andric Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1088044eb2f6SDimitry Andric return Cost;
1089044eb2f6SDimitry Andric };
1090044eb2f6SDimitry Andric
1091044eb2f6SDimitry Andric assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1092044eb2f6SDimitry Andric
1093044eb2f6SDimitry Andric if (ClonedOMRI->ORI.empty())
1094044eb2f6SDimitry Andric return false;
1095044eb2f6SDimitry Andric
1096044eb2f6SDimitry Andric // The CodeExtractor needs a dominator tree.
1097044eb2f6SDimitry Andric DominatorTree DT;
1098044eb2f6SDimitry Andric DT.recalculate(*ClonedFunc);
1099044eb2f6SDimitry Andric
1100044eb2f6SDimitry Andric // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1101044eb2f6SDimitry Andric LoopInfo LI(DT);
1102044eb2f6SDimitry Andric BranchProbabilityInfo BPI(*ClonedFunc, LI);
1103044eb2f6SDimitry Andric ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1104044eb2f6SDimitry Andric
11051d5ae102SDimitry Andric // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
11061d5ae102SDimitry Andric CodeExtractorAnalysisCache CEAC(*ClonedFunc);
11071d5ae102SDimitry Andric
1108044eb2f6SDimitry Andric SetVector<Value *> Inputs, Outputs, Sinks;
1109044eb2f6SDimitry Andric for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1110044eb2f6SDimitry Andric ClonedOMRI->ORI) {
1111344a3780SDimitry Andric InstructionCost CurrentOutlinedRegionCost =
1112344a3780SDimitry Andric ComputeRegionCost(RegionInfo.Region);
1113044eb2f6SDimitry Andric
1114044eb2f6SDimitry Andric CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1115e6d15924SDimitry Andric ClonedFuncBFI.get(), &BPI,
1116e6d15924SDimitry Andric LookupAC(*RegionInfo.EntryBlock->getParent()),
1117e6d15924SDimitry Andric /* AllowVarargs */ false);
1118044eb2f6SDimitry Andric
1119044eb2f6SDimitry Andric CE.findInputsOutputs(Inputs, Outputs, Sinks);
1120044eb2f6SDimitry Andric
1121b60736ecSDimitry Andric LLVM_DEBUG({
1122044eb2f6SDimitry Andric dbgs() << "inputs: " << Inputs.size() << "\n";
1123044eb2f6SDimitry Andric dbgs() << "outputs: " << Outputs.size() << "\n";
1124044eb2f6SDimitry Andric for (Value *value : Inputs)
1125044eb2f6SDimitry Andric dbgs() << "value used in func: " << *value << "\n";
1126044eb2f6SDimitry Andric for (Value *output : Outputs)
1127044eb2f6SDimitry Andric dbgs() << "instr used in func: " << *output << "\n";
1128b60736ecSDimitry Andric });
1129b60736ecSDimitry Andric
1130044eb2f6SDimitry Andric // Do not extract regions that have live exit variables.
1131044eb2f6SDimitry Andric if (Outputs.size() > 0 && !ForceLiveExit)
1132044eb2f6SDimitry Andric continue;
1133044eb2f6SDimitry Andric
1134b60736ecSDimitry Andric if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1135b60736ecSDimitry Andric CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1136cfca06d7SDimitry Andric BasicBlock *OutliningCallBB = OCS->getParent();
1137044eb2f6SDimitry Andric assert(OutliningCallBB->getParent() == ClonedFunc);
1138044eb2f6SDimitry Andric OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1139044eb2f6SDimitry Andric NumColdRegionsOutlined++;
1140044eb2f6SDimitry Andric OutlinedRegionCost += CurrentOutlinedRegionCost;
1141044eb2f6SDimitry Andric
1142044eb2f6SDimitry Andric if (MarkOutlinedColdCC) {
1143044eb2f6SDimitry Andric OutlinedFunc->setCallingConv(CallingConv::Cold);
1144cfca06d7SDimitry Andric OCS->setCallingConv(CallingConv::Cold);
1145044eb2f6SDimitry Andric }
1146044eb2f6SDimitry Andric } else
1147044eb2f6SDimitry Andric ORE.emit([&]() {
1148044eb2f6SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1149044eb2f6SDimitry Andric &RegionInfo.Region.front()->front())
1150044eb2f6SDimitry Andric << "Failed to extract region at block "
1151044eb2f6SDimitry Andric << ore::NV("Block", RegionInfo.Region.front());
1152044eb2f6SDimitry Andric });
1153044eb2f6SDimitry Andric }
1154044eb2f6SDimitry Andric
1155044eb2f6SDimitry Andric return !OutlinedFunctions.empty();
1156044eb2f6SDimitry Andric }
1157044eb2f6SDimitry Andric
1158044eb2f6SDimitry Andric Function *
doSingleRegionFunctionOutlining()1159044eb2f6SDimitry Andric PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
11607c7aba6eSDimitry Andric // Returns true if the block is to be partial inlined into the caller
11617c7aba6eSDimitry Andric // (i.e. not to be extracted to the out of line function)
11627c7aba6eSDimitry Andric auto ToBeInlined = [&, this](BasicBlock *BB) {
11637c7aba6eSDimitry Andric return BB == ClonedOI->ReturnBlock ||
1164b60736ecSDimitry Andric llvm::is_contained(ClonedOI->Entries, BB);
11657c7aba6eSDimitry Andric };
11667c7aba6eSDimitry Andric
1167044eb2f6SDimitry Andric assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1168044eb2f6SDimitry Andric // The CodeExtractor needs a dominator tree.
1169044eb2f6SDimitry Andric DominatorTree DT;
1170044eb2f6SDimitry Andric DT.recalculate(*ClonedFunc);
1171044eb2f6SDimitry Andric
1172044eb2f6SDimitry Andric // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1173044eb2f6SDimitry Andric LoopInfo LI(DT);
1174044eb2f6SDimitry Andric BranchProbabilityInfo BPI(*ClonedFunc, LI);
1175044eb2f6SDimitry Andric ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1176044eb2f6SDimitry Andric
11777c7aba6eSDimitry Andric // Gather up the blocks that we're going to extract.
11787c7aba6eSDimitry Andric std::vector<BasicBlock *> ToExtract;
1179b60736ecSDimitry Andric auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
11807c7aba6eSDimitry Andric ToExtract.push_back(ClonedOI->NonReturnBlock);
1181b60736ecSDimitry Andric OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1182b60736ecSDimitry Andric ClonedOI->NonReturnBlock, ClonedFuncTTI);
11837fa27ce4SDimitry Andric for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
11847fa27ce4SDimitry Andric if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
11857fa27ce4SDimitry Andric ToExtract.push_back(BB);
11867c7aba6eSDimitry Andric // FIXME: the code extractor may hoist/sink more code
11877c7aba6eSDimitry Andric // into the outlined function which may make the outlining
11887c7aba6eSDimitry Andric // overhead (the difference of the outlined function cost
11897c7aba6eSDimitry Andric // and OutliningRegionCost) look larger.
11907fa27ce4SDimitry Andric OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
11917c7aba6eSDimitry Andric }
11927c7aba6eSDimitry Andric
11937c7aba6eSDimitry Andric // Extract the body of the if.
11941d5ae102SDimitry Andric CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1195044eb2f6SDimitry Andric Function *OutlinedFunc =
1196044eb2f6SDimitry Andric CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1197e6d15924SDimitry Andric ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1198044eb2f6SDimitry Andric /* AllowVarargs */ true)
11991d5ae102SDimitry Andric .extractCodeRegion(CEAC);
12007c7aba6eSDimitry Andric
12017c7aba6eSDimitry Andric if (OutlinedFunc) {
1202044eb2f6SDimitry Andric BasicBlock *OutliningCallBB =
1203b60736ecSDimitry Andric PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
12047c7aba6eSDimitry Andric assert(OutliningCallBB->getParent() == ClonedFunc);
1205044eb2f6SDimitry Andric OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1206044eb2f6SDimitry Andric } else
1207044eb2f6SDimitry Andric ORE.emit([&]() {
1208044eb2f6SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1209044eb2f6SDimitry Andric &ToExtract.front()->front())
1210044eb2f6SDimitry Andric << "Failed to extract region at block "
1211044eb2f6SDimitry Andric << ore::NV("Block", ToExtract.front());
1212044eb2f6SDimitry Andric });
12137c7aba6eSDimitry Andric
12147c7aba6eSDimitry Andric return OutlinedFunc;
12157c7aba6eSDimitry Andric }
12167c7aba6eSDimitry Andric
~FunctionCloner()12177c7aba6eSDimitry Andric PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
12187c7aba6eSDimitry Andric // Ditch the duplicate, since we're done with it, and rewrite all remaining
12197c7aba6eSDimitry Andric // users (function pointers, etc.) back to the original function.
12207c7aba6eSDimitry Andric ClonedFunc->replaceAllUsesWith(OrigFunc);
12217c7aba6eSDimitry Andric ClonedFunc->eraseFromParent();
12227c7aba6eSDimitry Andric if (!IsFunctionInlined) {
1223044eb2f6SDimitry Andric // Remove each function that was speculatively created if there is no
12247c7aba6eSDimitry Andric // reference.
1225044eb2f6SDimitry Andric for (auto FuncBBPair : OutlinedFunctions) {
1226044eb2f6SDimitry Andric Function *Func = FuncBBPair.first;
1227044eb2f6SDimitry Andric Func->eraseFromParent();
1228044eb2f6SDimitry Andric }
12297c7aba6eSDimitry Andric }
12307c7aba6eSDimitry Andric }
12317c7aba6eSDimitry Andric
unswitchFunction(Function & F)1232b60736ecSDimitry Andric std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1233b60736ecSDimitry Andric if (F.hasAddressTaken())
1234044eb2f6SDimitry Andric return {false, nullptr};
1235a303c417SDimitry Andric
1236148779dfSDimitry Andric // Let inliner handle it
1237b60736ecSDimitry Andric if (F.hasFnAttribute(Attribute::AlwaysInline))
1238044eb2f6SDimitry Andric return {false, nullptr};
1239148779dfSDimitry Andric
1240b60736ecSDimitry Andric if (F.hasFnAttribute(Attribute::NoInline))
1241044eb2f6SDimitry Andric return {false, nullptr};
1242148779dfSDimitry Andric
1243b60736ecSDimitry Andric if (PSI.isFunctionEntryCold(&F))
1244044eb2f6SDimitry Andric return {false, nullptr};
1245148779dfSDimitry Andric
1246b60736ecSDimitry Andric if (F.users().empty())
1247044eb2f6SDimitry Andric return {false, nullptr};
1248a303c417SDimitry Andric
1249b60736ecSDimitry Andric OptimizationRemarkEmitter ORE(&F);
1250044eb2f6SDimitry Andric
1251044eb2f6SDimitry Andric // Only try to outline cold regions if we have a profile summary, which
1252044eb2f6SDimitry Andric // implies we have profiling information.
1253b60736ecSDimitry Andric if (PSI.hasProfileSummary() && F.hasProfileData() &&
1254044eb2f6SDimitry Andric !DisableMultiRegionPartialInline) {
1255044eb2f6SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1256eb11fae6SDimitry Andric computeOutliningColdRegionsInfo(F, ORE);
1257044eb2f6SDimitry Andric if (OMRI) {
1258b60736ecSDimitry Andric FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1259044eb2f6SDimitry Andric
1260b60736ecSDimitry Andric LLVM_DEBUG({
1261cfca06d7SDimitry Andric dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1262cfca06d7SDimitry Andric dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1263044eb2f6SDimitry Andric << "\n";
1264b60736ecSDimitry Andric });
1265b60736ecSDimitry Andric
1266044eb2f6SDimitry Andric bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1267044eb2f6SDimitry Andric
1268044eb2f6SDimitry Andric if (DidOutline) {
1269b60736ecSDimitry Andric LLVM_DEBUG({
1270044eb2f6SDimitry Andric dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1271044eb2f6SDimitry Andric Cloner.ClonedFunc->print(dbgs());
1272044eb2f6SDimitry Andric dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1273b60736ecSDimitry Andric });
1274044eb2f6SDimitry Andric
1275044eb2f6SDimitry Andric if (tryPartialInline(Cloner))
1276044eb2f6SDimitry Andric return {true, nullptr};
1277044eb2f6SDimitry Andric }
1278044eb2f6SDimitry Andric }
1279044eb2f6SDimitry Andric }
1280044eb2f6SDimitry Andric
1281044eb2f6SDimitry Andric // Fall-thru to regular partial inlining if we:
1282044eb2f6SDimitry Andric // i) can't find any cold regions to outline, or
1283044eb2f6SDimitry Andric // ii) can't inline the outlined function anywhere.
12846b3f41edSDimitry Andric std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
12856b3f41edSDimitry Andric if (!OI)
1286044eb2f6SDimitry Andric return {false, nullptr};
1287600c6fa1SEd Schouten
1288b60736ecSDimitry Andric FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1289b60736ecSDimitry Andric Cloner.normalizeReturnBlock();
1290044eb2f6SDimitry Andric
1291044eb2f6SDimitry Andric Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1292044eb2f6SDimitry Andric
1293044eb2f6SDimitry Andric if (!OutlinedFunction)
1294044eb2f6SDimitry Andric return {false, nullptr};
1295600c6fa1SEd Schouten
1296b60736ecSDimitry Andric if (tryPartialInline(Cloner))
1297044eb2f6SDimitry Andric return {true, OutlinedFunction};
1298b2f21fb0SEd Schouten
1299044eb2f6SDimitry Andric return {false, nullptr};
13006b3f41edSDimitry Andric }
13016b3f41edSDimitry Andric
tryPartialInline(FunctionCloner & Cloner)13027c7aba6eSDimitry Andric bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1303044eb2f6SDimitry Andric if (Cloner.OutlinedFunctions.empty())
13047c7aba6eSDimitry Andric return false;
13056b3f41edSDimitry Andric
1306344a3780SDimitry Andric auto OutliningCosts = computeOutliningCosts(Cloner);
1307344a3780SDimitry Andric
1308e3b55780SDimitry Andric InstructionCost SizeCost = std::get<0>(OutliningCosts);
1309e3b55780SDimitry Andric InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1310e3b55780SDimitry Andric
1311e3b55780SDimitry Andric assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1312e3b55780SDimitry Andric "Expected valid costs");
13137c7aba6eSDimitry Andric
1314044eb2f6SDimitry Andric // Only calculate RelativeToEntryFreq when we are doing single region
1315044eb2f6SDimitry Andric // outlining.
1316044eb2f6SDimitry Andric BranchProbability RelativeToEntryFreq;
1317b60736ecSDimitry Andric if (Cloner.ClonedOI)
1318044eb2f6SDimitry Andric RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1319b60736ecSDimitry Andric else
1320044eb2f6SDimitry Andric // RelativeToEntryFreq doesn't make sense when we have more than one
1321044eb2f6SDimitry Andric // outlined call because each call will have a different relative frequency
1322044eb2f6SDimitry Andric // to the entry block. We can consider using the average, but the
1323044eb2f6SDimitry Andric // usefulness of that information is questionable. For now, assume we never
1324044eb2f6SDimitry Andric // execute the calls to outlined functions.
1325044eb2f6SDimitry Andric RelativeToEntryFreq = BranchProbability(0, 1);
13266b3f41edSDimitry Andric
1327e3b55780SDimitry Andric BlockFrequency WeightedRcost =
1328e3b55780SDimitry Andric BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1329044eb2f6SDimitry Andric
1330044eb2f6SDimitry Andric // The call sequence(s) to the outlined function(s) are larger than the sum of
1331044eb2f6SDimitry Andric // the original outlined region size(s), it does not increase the chances of
1332044eb2f6SDimitry Andric // inlining the function with outlining (The inliner uses the size increase to
13337c7aba6eSDimitry Andric // model the cost of inlining a callee).
13347c7aba6eSDimitry Andric if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1335eb11fae6SDimitry Andric OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
13366b3f41edSDimitry Andric DebugLoc DLoc;
13376b3f41edSDimitry Andric BasicBlock *Block;
1338b60736ecSDimitry Andric std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1339eb11fae6SDimitry Andric OrigFuncORE.emit([&]() {
1340044eb2f6SDimitry Andric return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
13416b3f41edSDimitry Andric DLoc, Block)
13427c7aba6eSDimitry Andric << ore::NV("Function", Cloner.OrigFunc)
13436b3f41edSDimitry Andric << " not partially inlined into callers (Original Size = "
13447c7aba6eSDimitry Andric << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
13456b3f41edSDimitry Andric << ", Size of call sequence to outlined function = "
1346044eb2f6SDimitry Andric << ore::NV("NewSize", SizeCost) << ")";
1347044eb2f6SDimitry Andric });
13486b3f41edSDimitry Andric return false;
13496b3f41edSDimitry Andric }
13506b3f41edSDimitry Andric
13511d5ae102SDimitry Andric assert(Cloner.OrigFunc->users().empty() &&
13526b3f41edSDimitry Andric "F's users should all be replaced!");
13537c7aba6eSDimitry Andric
13547c7aba6eSDimitry Andric std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
13557c7aba6eSDimitry Andric Cloner.ClonedFunc->user_end());
13566b3f41edSDimitry Andric
13576b3f41edSDimitry Andric DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1358c7dac04cSDimitry Andric auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1359c7dac04cSDimitry Andric if (CalleeEntryCount)
13607c7aba6eSDimitry Andric computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
13616b3f41edSDimitry Andric
1362eb11fae6SDimitry Andric uint64_t CalleeEntryCountV =
1363c0981da4SDimitry Andric (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
13647c7aba6eSDimitry Andric
13656b3f41edSDimitry Andric bool AnyInline = false;
13666b3f41edSDimitry Andric for (User *User : Users) {
13676f8fc217SDimitry Andric // Don't bother with BlockAddress used by CallBr for asm goto.
13686f8fc217SDimitry Andric if (isa<BlockAddress>(User))
13696f8fc217SDimitry Andric continue;
13706f8fc217SDimitry Andric
1371cfca06d7SDimitry Andric CallBase *CB = getSupportedCallBase(User);
13726b3f41edSDimitry Andric
1373b60736ecSDimitry Andric if (isLimitReached())
13746b3f41edSDimitry Andric continue;
13756b3f41edSDimitry Andric
1376cfca06d7SDimitry Andric OptimizationRemarkEmitter CallerORE(CB->getCaller());
1377cfca06d7SDimitry Andric if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
13786b3f41edSDimitry Andric continue;
13796b3f41edSDimitry Andric
1380044eb2f6SDimitry Andric // Construct remark before doing the inlining, as after successful inlining
1381044eb2f6SDimitry Andric // the callsite is removed.
1382cfca06d7SDimitry Andric OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1383044eb2f6SDimitry Andric OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1384cfca06d7SDimitry Andric << ore::NV("Caller", CB->getCaller());
13856b3f41edSDimitry Andric
13867fa27ce4SDimitry Andric InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1387044eb2f6SDimitry Andric // We can only forward varargs when we outlined a single region, else we
1388044eb2f6SDimitry Andric // bail on vararg functions.
1389e3b55780SDimitry Andric if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
1390044eb2f6SDimitry Andric (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1391cfca06d7SDimitry Andric : nullptr))
1392cfca06d7SDimitry Andric .isSuccess())
1393044eb2f6SDimitry Andric continue;
1394044eb2f6SDimitry Andric
1395eb11fae6SDimitry Andric CallerORE.emit(OR);
13966b3f41edSDimitry Andric
13976b3f41edSDimitry Andric // Now update the entry count:
13986b3f41edSDimitry Andric if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
13996b3f41edSDimitry Andric uint64_t CallSiteCount = CallSiteToProfCountMap[User];
14006b3f41edSDimitry Andric CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
14016b3f41edSDimitry Andric }
14026b3f41edSDimitry Andric
14036b3f41edSDimitry Andric AnyInline = true;
14046b3f41edSDimitry Andric NumPartialInlining++;
14056b3f41edSDimitry Andric // Update the stats
1406044eb2f6SDimitry Andric if (Cloner.ClonedOI)
14076b3f41edSDimitry Andric NumPartialInlined++;
1408044eb2f6SDimitry Andric else
1409044eb2f6SDimitry Andric NumColdOutlinePartialInlined++;
14106b3f41edSDimitry Andric }
14116b3f41edSDimitry Andric
14127c7aba6eSDimitry Andric if (AnyInline) {
14137c7aba6eSDimitry Andric Cloner.IsFunctionInlined = true;
14147c7aba6eSDimitry Andric if (CalleeEntryCount)
1415c0981da4SDimitry Andric Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1416c0981da4SDimitry Andric CalleeEntryCountV, CalleeEntryCount->getType()));
1417eb11fae6SDimitry Andric OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1418eb11fae6SDimitry Andric OrigFuncORE.emit([&]() {
1419044eb2f6SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1420044eb2f6SDimitry Andric << "Partially inlined into at least one caller";
1421044eb2f6SDimitry Andric });
14227c7aba6eSDimitry Andric }
14236b3f41edSDimitry Andric
14246b3f41edSDimitry Andric return AnyInline;
1425600c6fa1SEd Schouten }
1426600c6fa1SEd Schouten
run(Module & M)1427b915e9e0SDimitry Andric bool PartialInlinerImpl::run(Module &M) {
142812f3ca4cSDimitry Andric if (DisablePartialInlining)
142912f3ca4cSDimitry Andric return false;
143012f3ca4cSDimitry Andric
1431b915e9e0SDimitry Andric std::vector<Function *> Worklist;
1432b915e9e0SDimitry Andric Worklist.reserve(M.size());
143301095a5dSDimitry Andric for (Function &F : M)
143401095a5dSDimitry Andric if (!F.use_empty() && !F.isDeclaration())
1435b915e9e0SDimitry Andric Worklist.push_back(&F);
1436600c6fa1SEd Schouten
1437b915e9e0SDimitry Andric bool Changed = false;
1438b915e9e0SDimitry Andric while (!Worklist.empty()) {
1439b915e9e0SDimitry Andric Function *CurrFunc = Worklist.back();
1440b915e9e0SDimitry Andric Worklist.pop_back();
1441600c6fa1SEd Schouten
1442b915e9e0SDimitry Andric if (CurrFunc->use_empty())
1443b915e9e0SDimitry Andric continue;
1444600c6fa1SEd Schouten
1445b60736ecSDimitry Andric std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1446044eb2f6SDimitry Andric if (Result.second)
1447044eb2f6SDimitry Andric Worklist.push_back(Result.second);
1448d8e91e46SDimitry Andric Changed |= Result.first;
1449600c6fa1SEd Schouten }
1450600c6fa1SEd Schouten
1451b915e9e0SDimitry Andric return Changed;
1452600c6fa1SEd Schouten }
1453600c6fa1SEd Schouten
run(Module & M,ModuleAnalysisManager & AM)1454b915e9e0SDimitry Andric PreservedAnalyses PartialInlinerPass::run(Module &M,
1455b915e9e0SDimitry Andric ModuleAnalysisManager &AM) {
1456b915e9e0SDimitry Andric auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1457a303c417SDimitry Andric
1458cfca06d7SDimitry Andric auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1459b915e9e0SDimitry Andric return FAM.getResult<AssumptionAnalysis>(F);
1460b915e9e0SDimitry Andric };
1461a303c417SDimitry Andric
1462e6d15924SDimitry Andric auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1463e6d15924SDimitry Andric return FAM.getCachedResult<AssumptionAnalysis>(F);
1464e6d15924SDimitry Andric };
1465e6d15924SDimitry Andric
1466cfca06d7SDimitry Andric auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1467a303c417SDimitry Andric return FAM.getResult<BlockFrequencyAnalysis>(F);
1468a303c417SDimitry Andric };
1469a303c417SDimitry Andric
1470cfca06d7SDimitry Andric auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1471a303c417SDimitry Andric return FAM.getResult<TargetIRAnalysis>(F);
1472a303c417SDimitry Andric };
1473a303c417SDimitry Andric
1474cfca06d7SDimitry Andric auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1475cfca06d7SDimitry Andric return FAM.getResult<TargetLibraryAnalysis>(F);
1476cfca06d7SDimitry Andric };
1477a303c417SDimitry Andric
1478cfca06d7SDimitry Andric ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
1479cfca06d7SDimitry Andric
1480cfca06d7SDimitry Andric if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1481cfca06d7SDimitry Andric GetTLI, PSI, GetBFI)
1482044eb2f6SDimitry Andric .run(M))
148301095a5dSDimitry Andric return PreservedAnalyses::none();
148401095a5dSDimitry Andric return PreservedAnalyses::all();
1485600c6fa1SEd Schouten }
1486