xref: /src/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1600c6fa1SEd Schouten //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2600c6fa1SEd Schouten //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6600c6fa1SEd Schouten //
7600c6fa1SEd Schouten //===----------------------------------------------------------------------===//
8600c6fa1SEd Schouten //
9600c6fa1SEd Schouten // This pass performs partial inlining, typically by inlining an if statement
10600c6fa1SEd Schouten // that surrounds the body of the function.
11600c6fa1SEd Schouten //
12600c6fa1SEd Schouten //===----------------------------------------------------------------------===//
13600c6fa1SEd Schouten 
1401095a5dSDimitry Andric #include "llvm/Transforms/IPO/PartialInlining.h"
15044eb2f6SDimitry Andric #include "llvm/ADT/DenseMap.h"
16044eb2f6SDimitry Andric #include "llvm/ADT/DenseSet.h"
177fa27ce4SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
18044eb2f6SDimitry Andric #include "llvm/ADT/STLExtras.h"
19044eb2f6SDimitry Andric #include "llvm/ADT/SmallVector.h"
204a16efa3SDimitry Andric #include "llvm/ADT/Statistic.h"
21b915e9e0SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
22b915e9e0SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
23a303c417SDimitry Andric #include "llvm/Analysis/InlineCost.h"
24b915e9e0SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
25044eb2f6SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
26a303c417SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
27a303c417SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
28a303c417SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
29044eb2f6SDimitry Andric #include "llvm/IR/Attributes.h"
30044eb2f6SDimitry Andric #include "llvm/IR/BasicBlock.h"
315ca98fd9SDimitry Andric #include "llvm/IR/CFG.h"
32044eb2f6SDimitry Andric #include "llvm/IR/DebugLoc.h"
3312f3ca4cSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
345ca98fd9SDimitry Andric #include "llvm/IR/Dominators.h"
35044eb2f6SDimitry Andric #include "llvm/IR/Function.h"
36044eb2f6SDimitry Andric #include "llvm/IR/InstrTypes.h"
37044eb2f6SDimitry Andric #include "llvm/IR/Instruction.h"
384a16efa3SDimitry Andric #include "llvm/IR/Instructions.h"
39044eb2f6SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
40044eb2f6SDimitry Andric #include "llvm/IR/Intrinsics.h"
414a16efa3SDimitry Andric #include "llvm/IR/Module.h"
42145449b1SDimitry Andric #include "llvm/IR/Operator.h"
43e3b55780SDimitry Andric #include "llvm/IR/ProfDataUtils.h"
44044eb2f6SDimitry Andric #include "llvm/IR/User.h"
45044eb2f6SDimitry Andric #include "llvm/Support/BlockFrequency.h"
46044eb2f6SDimitry Andric #include "llvm/Support/BranchProbability.h"
47044eb2f6SDimitry Andric #include "llvm/Support/Casting.h"
48044eb2f6SDimitry Andric #include "llvm/Support/CommandLine.h"
49044eb2f6SDimitry Andric #include "llvm/Support/ErrorHandling.h"
5001095a5dSDimitry Andric #include "llvm/Transforms/IPO.h"
51600c6fa1SEd Schouten #include "llvm/Transforms/Utils/Cloning.h"
5258b69754SDimitry Andric #include "llvm/Transforms/Utils/CodeExtractor.h"
53044eb2f6SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
54044eb2f6SDimitry Andric #include <algorithm>
55044eb2f6SDimitry Andric #include <cassert>
56044eb2f6SDimitry Andric #include <cstdint>
57044eb2f6SDimitry Andric #include <memory>
58044eb2f6SDimitry Andric #include <tuple>
59044eb2f6SDimitry Andric #include <vector>
60044eb2f6SDimitry Andric 
61600c6fa1SEd Schouten using namespace llvm;
62600c6fa1SEd Schouten 
6312f3ca4cSDimitry Andric #define DEBUG_TYPE "partial-inlining"
645ca98fd9SDimitry Andric 
65a303c417SDimitry Andric STATISTIC(NumPartialInlined,
66a303c417SDimitry Andric           "Number of callsites functions partially inlined into.");
67044eb2f6SDimitry Andric STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
68044eb2f6SDimitry Andric                                         "cold outlined regions were partially "
69044eb2f6SDimitry Andric                                         "inlined into its caller(s).");
70044eb2f6SDimitry Andric STATISTIC(NumColdRegionsFound,
71044eb2f6SDimitry Andric            "Number of cold single entry/exit regions found.");
72044eb2f6SDimitry Andric STATISTIC(NumColdRegionsOutlined,
73044eb2f6SDimitry Andric            "Number of cold single entry/exit regions outlined.");
74b2f21fb0SEd Schouten 
7512f3ca4cSDimitry Andric // Command line option to disable partial-inlining. The default is false:
7612f3ca4cSDimitry Andric static cl::opt<bool>
7712f3ca4cSDimitry Andric     DisablePartialInlining("disable-partial-inlining", cl::init(false),
78044eb2f6SDimitry Andric                            cl::Hidden, cl::desc("Disable partial inlining"));
79044eb2f6SDimitry Andric // Command line option to disable multi-region partial-inlining. The default is
80044eb2f6SDimitry Andric // false:
81044eb2f6SDimitry Andric static cl::opt<bool> DisableMultiRegionPartialInline(
82044eb2f6SDimitry Andric     "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
83044eb2f6SDimitry Andric     cl::desc("Disable multi-region partial inlining"));
84044eb2f6SDimitry Andric 
85044eb2f6SDimitry Andric // Command line option to force outlining in regions with live exit variables.
86044eb2f6SDimitry Andric // The default is false:
87044eb2f6SDimitry Andric static cl::opt<bool>
88044eb2f6SDimitry Andric     ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
89044eb2f6SDimitry Andric                cl::desc("Force outline regions with live exits"));
90044eb2f6SDimitry Andric 
91044eb2f6SDimitry Andric // Command line option to enable marking outline functions with Cold Calling
92044eb2f6SDimitry Andric // Convention. The default is false:
93044eb2f6SDimitry Andric static cl::opt<bool>
94044eb2f6SDimitry Andric     MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
95044eb2f6SDimitry Andric                        cl::desc("Mark outline function calls with ColdCC"));
96044eb2f6SDimitry Andric 
976b3f41edSDimitry Andric // This is an option used by testing:
986b3f41edSDimitry Andric static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
99145449b1SDimitry Andric 
1006b3f41edSDimitry Andric                                       cl::ReallyHidden,
1016b3f41edSDimitry Andric                                       cl::desc("Skip Cost Analysis"));
102044eb2f6SDimitry Andric // Used to determine if a cold region is worth outlining based on
103044eb2f6SDimitry Andric // its inlining cost compared to the original function.  Default is set at 10%.
104044eb2f6SDimitry Andric // ie. if the cold region reduces the inlining cost of the original function by
105044eb2f6SDimitry Andric // at least 10%.
106044eb2f6SDimitry Andric static cl::opt<float> MinRegionSizeRatio(
107044eb2f6SDimitry Andric     "min-region-size-ratio", cl::init(0.1), cl::Hidden,
108044eb2f6SDimitry Andric     cl::desc("Minimum ratio comparing relative sizes of each "
109044eb2f6SDimitry Andric              "outline candidate and original function"));
110044eb2f6SDimitry Andric // Used to tune the minimum number of execution counts needed in the predecessor
111044eb2f6SDimitry Andric // block to the cold edge. ie. confidence interval.
112044eb2f6SDimitry Andric static cl::opt<unsigned>
113044eb2f6SDimitry Andric     MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114044eb2f6SDimitry Andric                              cl::desc("Minimum block executions to consider "
115044eb2f6SDimitry Andric                                       "its BranchProbabilityInfo valid"));
116044eb2f6SDimitry Andric // Used to determine when an edge is considered cold. Default is set to 10%. ie.
117044eb2f6SDimitry Andric // if the branch probability is 10% or less, then it is deemed as 'cold'.
118044eb2f6SDimitry Andric static cl::opt<float> ColdBranchRatio(
119044eb2f6SDimitry Andric     "cold-branch-ratio", cl::init(0.1), cl::Hidden,
120044eb2f6SDimitry Andric     cl::desc("Minimum BranchProbability to consider a region cold."));
12112f3ca4cSDimitry Andric 
122a303c417SDimitry Andric static cl::opt<unsigned> MaxNumInlineBlocks(
123a303c417SDimitry Andric     "max-num-inline-blocks", cl::init(5), cl::Hidden,
124044eb2f6SDimitry Andric     cl::desc("Max number of blocks to be partially inlined"));
125a303c417SDimitry Andric 
12612f3ca4cSDimitry Andric // Command line option to set the maximum number of partial inlining allowed
12712f3ca4cSDimitry Andric // for the module. The default value of -1 means no limit.
12812f3ca4cSDimitry Andric static cl::opt<int> MaxNumPartialInlining(
129145449b1SDimitry Andric     "max-partial-inlining", cl::init(-1), cl::Hidden,
13012f3ca4cSDimitry Andric     cl::desc("Max number of partial inlining. The default is unlimited"));
13112f3ca4cSDimitry Andric 
1326b3f41edSDimitry Andric // Used only when PGO or user annotated branch data is absent. It is
1336b3f41edSDimitry Andric // the least value that is used to weigh the outline region. If BFI
1346b3f41edSDimitry Andric // produces larger value, the BFI value will be used.
1356b3f41edSDimitry Andric static cl::opt<int>
1366b3f41edSDimitry Andric     OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
137145449b1SDimitry Andric                              cl::Hidden,
1386b3f41edSDimitry Andric                              cl::desc("Relative frequency of outline region to "
1396b3f41edSDimitry Andric                                       "the entry block"));
1406b3f41edSDimitry Andric 
141d288ef4cSDimitry Andric static cl::opt<unsigned> ExtraOutliningPenalty(
142d288ef4cSDimitry Andric     "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
143d288ef4cSDimitry Andric     cl::desc("A debug option to add additional penalty to the computed one."));
144d288ef4cSDimitry Andric 
145600c6fa1SEd Schouten namespace {
146a303c417SDimitry Andric 
147a303c417SDimitry Andric struct FunctionOutliningInfo {
148044eb2f6SDimitry Andric   FunctionOutliningInfo() = default;
149044eb2f6SDimitry Andric 
150a303c417SDimitry Andric   // Returns the number of blocks to be inlined including all blocks
151a303c417SDimitry Andric   // in Entries and one return block.
getNumInlinedBlocks__anonb7b3c81f0111::FunctionOutliningInfo152b60736ecSDimitry Andric   unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
153a303c417SDimitry Andric 
154a303c417SDimitry Andric   // A set of blocks including the function entry that guard
155a303c417SDimitry Andric   // the region to be outlined.
156a303c417SDimitry Andric   SmallVector<BasicBlock *, 4> Entries;
157044eb2f6SDimitry Andric 
158a303c417SDimitry Andric   // The return block that is not included in the outlined region.
159044eb2f6SDimitry Andric   BasicBlock *ReturnBlock = nullptr;
160044eb2f6SDimitry Andric 
161d288ef4cSDimitry Andric   // The dominating block of the region to be outlined.
162044eb2f6SDimitry Andric   BasicBlock *NonReturnBlock = nullptr;
163044eb2f6SDimitry Andric 
164b1c73532SDimitry Andric   // The set of blocks in Entries that are predecessors to ReturnBlock
165a303c417SDimitry Andric   SmallVector<BasicBlock *, 4> ReturnBlockPreds;
166a303c417SDimitry Andric };
167a303c417SDimitry Andric 
168044eb2f6SDimitry Andric struct FunctionOutliningMultiRegionInfo {
169145449b1SDimitry Andric   FunctionOutliningMultiRegionInfo() = default;
170044eb2f6SDimitry Andric 
171044eb2f6SDimitry Andric   // Container for outline regions
172044eb2f6SDimitry Andric   struct OutlineRegionInfo {
OutlineRegionInfo__anonb7b3c81f0111::FunctionOutliningMultiRegionInfo::OutlineRegionInfo173e6d15924SDimitry Andric     OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
174044eb2f6SDimitry Andric                       BasicBlock *EntryBlock, BasicBlock *ExitBlock,
175044eb2f6SDimitry Andric                       BasicBlock *ReturnBlock)
176e6d15924SDimitry Andric         : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
177e6d15924SDimitry Andric           ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
178044eb2f6SDimitry Andric     SmallVector<BasicBlock *, 8> Region;
179044eb2f6SDimitry Andric     BasicBlock *EntryBlock;
180044eb2f6SDimitry Andric     BasicBlock *ExitBlock;
181044eb2f6SDimitry Andric     BasicBlock *ReturnBlock;
182044eb2f6SDimitry Andric   };
183044eb2f6SDimitry Andric 
184044eb2f6SDimitry Andric   SmallVector<OutlineRegionInfo, 4> ORI;
185044eb2f6SDimitry Andric };
186044eb2f6SDimitry Andric 
187b915e9e0SDimitry Andric struct PartialInlinerImpl {
188044eb2f6SDimitry Andric 
PartialInlinerImpl__anonb7b3c81f0111::PartialInlinerImpl189a303c417SDimitry Andric   PartialInlinerImpl(
190cfca06d7SDimitry Andric       function_ref<AssumptionCache &(Function &)> GetAC,
191e6d15924SDimitry Andric       function_ref<AssumptionCache *(Function &)> LookupAC,
192cfca06d7SDimitry Andric       function_ref<TargetTransformInfo &(Function &)> GTTI,
193cfca06d7SDimitry Andric       function_ref<const TargetLibraryInfo &(Function &)> GTLI,
194cfca06d7SDimitry Andric       ProfileSummaryInfo &ProfSI,
195cfca06d7SDimitry Andric       function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
196e6d15924SDimitry Andric       : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
197cfca06d7SDimitry Andric         GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
198044eb2f6SDimitry Andric 
199b915e9e0SDimitry Andric   bool run(Module &M);
200044eb2f6SDimitry Andric   // Main part of the transformation that calls helper functions to find
201044eb2f6SDimitry Andric   // outlining candidates, clone & outline the function, and attempt to
202044eb2f6SDimitry Andric   // partially inline the resulting function. Returns true if
203044eb2f6SDimitry Andric   // inlining was successful, false otherwise.  Also returns the outline
204044eb2f6SDimitry Andric   // function (only if we partially inlined early returns) as there is a
205044eb2f6SDimitry Andric   // possibility to further "peel" early return statements that were left in the
206044eb2f6SDimitry Andric   // outline function due to code size.
207b60736ecSDimitry Andric   std::pair<bool, Function *> unswitchFunction(Function &F);
208b915e9e0SDimitry Andric 
209eb11fae6SDimitry Andric   // This class speculatively clones the function to be partial inlined.
2107c7aba6eSDimitry Andric   // At the end of partial inlining, the remaining callsites to the cloned
2117c7aba6eSDimitry Andric   // function that are not partially inlined will be fixed up to reference
2127c7aba6eSDimitry Andric   // the original function, and the cloned function will be erased.
2137c7aba6eSDimitry Andric   struct FunctionCloner {
214044eb2f6SDimitry Andric     // Two constructors, one for single region outlining, the other for
215044eb2f6SDimitry Andric     // multi-region outlining.
216044eb2f6SDimitry Andric     FunctionCloner(Function *F, FunctionOutliningInfo *OI,
217e6d15924SDimitry Andric                    OptimizationRemarkEmitter &ORE,
218b60736ecSDimitry Andric                    function_ref<AssumptionCache *(Function &)> LookupAC,
219b60736ecSDimitry Andric                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
220044eb2f6SDimitry Andric     FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
221e6d15924SDimitry Andric                    OptimizationRemarkEmitter &ORE,
222b60736ecSDimitry Andric                    function_ref<AssumptionCache *(Function &)> LookupAC,
223b60736ecSDimitry Andric                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
224b60736ecSDimitry Andric 
2257c7aba6eSDimitry Andric     ~FunctionCloner();
2267c7aba6eSDimitry Andric 
2277c7aba6eSDimitry Andric     // Prepare for function outlining: making sure there is only
2287c7aba6eSDimitry Andric     // one incoming edge from the extracted/outlined region to
2297c7aba6eSDimitry Andric     // the return block.
230b60736ecSDimitry Andric     void normalizeReturnBlock() const;
2317c7aba6eSDimitry Andric 
232044eb2f6SDimitry Andric     // Do function outlining for cold regions.
233044eb2f6SDimitry Andric     bool doMultiRegionFunctionOutlining();
234044eb2f6SDimitry Andric     // Do function outlining for region after early return block(s).
235044eb2f6SDimitry Andric     // NOTE: For vararg functions that do the vararg handling in the outlined
236044eb2f6SDimitry Andric     //       function, we temporarily generate IR that does not properly
237044eb2f6SDimitry Andric     //       forward varargs to the outlined function. Calling InlineFunction
238044eb2f6SDimitry Andric     //       will update calls to the outlined functions to properly forward
239044eb2f6SDimitry Andric     //       the varargs.
240044eb2f6SDimitry Andric     Function *doSingleRegionFunctionOutlining();
2417c7aba6eSDimitry Andric 
2427c7aba6eSDimitry Andric     Function *OrigFunc = nullptr;
2437c7aba6eSDimitry Andric     Function *ClonedFunc = nullptr;
244044eb2f6SDimitry Andric 
245044eb2f6SDimitry Andric     typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
246044eb2f6SDimitry Andric     // Keep track of Outlined Functions and the basic block they're called from.
247044eb2f6SDimitry Andric     SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
248044eb2f6SDimitry Andric 
2497c7aba6eSDimitry Andric     // ClonedFunc is inlined in one of its callers after function
2507c7aba6eSDimitry Andric     // outlining.
2517c7aba6eSDimitry Andric     bool IsFunctionInlined = false;
2527c7aba6eSDimitry Andric     // The cost of the region to be outlined.
253344a3780SDimitry Andric     InstructionCost OutlinedRegionCost = 0;
254044eb2f6SDimitry Andric     // ClonedOI is specific to outlining non-early return blocks.
2557c7aba6eSDimitry Andric     std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
256044eb2f6SDimitry Andric     // ClonedOMRI is specific to outlining cold regions.
257044eb2f6SDimitry Andric     std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
2587c7aba6eSDimitry Andric     std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
259044eb2f6SDimitry Andric     OptimizationRemarkEmitter &ORE;
260e6d15924SDimitry Andric     function_ref<AssumptionCache *(Function &)> LookupAC;
261b60736ecSDimitry Andric     function_ref<TargetTransformInfo &(Function &)> GetTTI;
2627c7aba6eSDimitry Andric   };
2637c7aba6eSDimitry Andric 
264a303c417SDimitry Andric private:
265a303c417SDimitry Andric   int NumPartialInlining = 0;
266cfca06d7SDimitry Andric   function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
267e6d15924SDimitry Andric   function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
268cfca06d7SDimitry Andric   function_ref<TargetTransformInfo &(Function &)> GetTTI;
269cfca06d7SDimitry Andric   function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
270cfca06d7SDimitry Andric   function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
271cfca06d7SDimitry Andric   ProfileSummaryInfo &PSI;
272a303c417SDimitry Andric 
2736b3f41edSDimitry Andric   // Return the frequency of the OutlininingBB relative to F's entry point.
2746b3f41edSDimitry Andric   // The result is no larger than 1 and is represented using BP.
2756b3f41edSDimitry Andric   // (Note that the outlined region's 'head' block can only have incoming
2766b3f41edSDimitry Andric   // edges from the guarding entry blocks).
277b60736ecSDimitry Andric   BranchProbability
278b60736ecSDimitry Andric   getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
2796b3f41edSDimitry Andric 
280cfca06d7SDimitry Andric   // Return true if the callee of CB should be partially inlined with
2816b3f41edSDimitry Andric   // profit.
282cfca06d7SDimitry Andric   bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
283eb11fae6SDimitry Andric                            BlockFrequency WeightedOutliningRcost,
284b60736ecSDimitry Andric                            OptimizationRemarkEmitter &ORE) const;
2856b3f41edSDimitry Andric 
2866b3f41edSDimitry Andric   // Try to inline DuplicateFunction (cloned from F with call to
2876b3f41edSDimitry Andric   // the OutlinedFunction into its callers. Return true
2886b3f41edSDimitry Andric   // if there is any successful inlining.
2897c7aba6eSDimitry Andric   bool tryPartialInline(FunctionCloner &Cloner);
2906b3f41edSDimitry Andric 
2916b3f41edSDimitry Andric   // Compute the mapping from use site of DuplicationFunction to the enclosing
2926b3f41edSDimitry Andric   // BB's profile count.
293b60736ecSDimitry Andric   void
294b60736ecSDimitry Andric   computeCallsiteToProfCountMap(Function *DuplicateFunction,
295b60736ecSDimitry Andric                                 DenseMap<User *, uint64_t> &SiteCountMap) const;
2966b3f41edSDimitry Andric 
isLimitReached__anonb7b3c81f0111::PartialInlinerImpl297b60736ecSDimitry Andric   bool isLimitReached() const {
29812f3ca4cSDimitry Andric     return (MaxNumPartialInlining != -1 &&
29912f3ca4cSDimitry Andric             NumPartialInlining >= MaxNumPartialInlining);
30012f3ca4cSDimitry Andric   }
3016b3f41edSDimitry Andric 
getSupportedCallBase__anonb7b3c81f0111::PartialInlinerImpl302cfca06d7SDimitry Andric   static CallBase *getSupportedCallBase(User *U) {
303cfca06d7SDimitry Andric     if (isa<CallInst>(U) || isa<InvokeInst>(U))
304cfca06d7SDimitry Andric       return cast<CallBase>(U);
3056b3f41edSDimitry Andric     llvm_unreachable("All uses must be calls");
306cfca06d7SDimitry Andric     return nullptr;
3076b3f41edSDimitry Andric   }
3086b3f41edSDimitry Andric 
getOneCallSiteTo__anonb7b3c81f0111::PartialInlinerImpl309b60736ecSDimitry Andric   static CallBase *getOneCallSiteTo(Function &F) {
310b60736ecSDimitry Andric     User *User = *F.user_begin();
311cfca06d7SDimitry Andric     return getSupportedCallBase(User);
3126b3f41edSDimitry Andric   }
3136b3f41edSDimitry Andric 
getOneDebugLoc__anonb7b3c81f0111::PartialInlinerImpl314b60736ecSDimitry Andric   std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
315cfca06d7SDimitry Andric     CallBase *CB = getOneCallSiteTo(F);
316cfca06d7SDimitry Andric     DebugLoc DLoc = CB->getDebugLoc();
317cfca06d7SDimitry Andric     BasicBlock *Block = CB->getParent();
3186b3f41edSDimitry Andric     return std::make_tuple(DLoc, Block);
3196b3f41edSDimitry Andric   }
3206b3f41edSDimitry Andric 
3216b3f41edSDimitry Andric   // Returns the costs associated with function outlining:
3226b3f41edSDimitry Andric   // - The first value is the non-weighted runtime cost for making the call
3237c7aba6eSDimitry Andric   //   to the outlined function, including the addtional  setup cost in the
3247c7aba6eSDimitry Andric   //    outlined function itself;
3256b3f41edSDimitry Andric   // - The second value is the estimated size of the new call sequence in
3267c7aba6eSDimitry Andric   //   basic block Cloner.OutliningCallBB;
327344a3780SDimitry Andric   std::tuple<InstructionCost, InstructionCost>
328344a3780SDimitry Andric   computeOutliningCosts(FunctionCloner &Cloner) const;
329044eb2f6SDimitry Andric 
3306b3f41edSDimitry Andric   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
3316b3f41edSDimitry Andric   // approximate both the size and runtime cost (Note that in the current
3326b3f41edSDimitry Andric   // inline cost analysis, there is no clear distinction there either).
333344a3780SDimitry Andric   static InstructionCost computeBBInlineCost(BasicBlock *BB,
334344a3780SDimitry Andric                                              TargetTransformInfo *TTI);
3356b3f41edSDimitry Andric 
336b60736ecSDimitry Andric   std::unique_ptr<FunctionOutliningInfo>
337b60736ecSDimitry Andric   computeOutliningInfo(Function &F) const;
338b60736ecSDimitry Andric 
339044eb2f6SDimitry Andric   std::unique_ptr<FunctionOutliningMultiRegionInfo>
340b60736ecSDimitry Andric   computeOutliningColdRegionsInfo(Function &F,
341b60736ecSDimitry Andric                                   OptimizationRemarkEmitter &ORE) const;
342b915e9e0SDimitry Andric };
343a303c417SDimitry Andric 
344044eb2f6SDimitry Andric } // end anonymous namespace
345044eb2f6SDimitry Andric 
346044eb2f6SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function & F,OptimizationRemarkEmitter & ORE) const347b60736ecSDimitry Andric PartialInlinerImpl::computeOutliningColdRegionsInfo(
348b60736ecSDimitry Andric     Function &F, OptimizationRemarkEmitter &ORE) const {
349b60736ecSDimitry Andric   BasicBlock *EntryBlock = &F.front();
350044eb2f6SDimitry Andric 
351b60736ecSDimitry Andric   DominatorTree DT(F);
352044eb2f6SDimitry Andric   LoopInfo LI(DT);
353b60736ecSDimitry Andric   BranchProbabilityInfo BPI(F, LI);
354044eb2f6SDimitry Andric   std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
355044eb2f6SDimitry Andric   BlockFrequencyInfo *BFI;
356044eb2f6SDimitry Andric   if (!GetBFI) {
357b60736ecSDimitry Andric     ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
358044eb2f6SDimitry Andric     BFI = ScopedBFI.get();
359044eb2f6SDimitry Andric   } else
360b60736ecSDimitry Andric     BFI = &(GetBFI(F));
361044eb2f6SDimitry Andric 
362044eb2f6SDimitry Andric   // Return if we don't have profiling information.
363cfca06d7SDimitry Andric   if (!PSI.hasInstrumentationProfile())
364044eb2f6SDimitry Andric     return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
365044eb2f6SDimitry Andric 
366044eb2f6SDimitry Andric   std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
3671d5ae102SDimitry Andric       std::make_unique<FunctionOutliningMultiRegionInfo>();
368044eb2f6SDimitry Andric 
369044eb2f6SDimitry Andric   auto IsSingleExit =
370044eb2f6SDimitry Andric       [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
371044eb2f6SDimitry Andric     BasicBlock *ExitBlock = nullptr;
372044eb2f6SDimitry Andric     for (auto *Block : BlockList) {
373344a3780SDimitry Andric       for (BasicBlock *Succ : successors(Block)) {
374344a3780SDimitry Andric         if (!is_contained(BlockList, Succ)) {
375044eb2f6SDimitry Andric           if (ExitBlock) {
376044eb2f6SDimitry Andric             ORE.emit([&]() {
377044eb2f6SDimitry Andric               return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
378344a3780SDimitry Andric                                               &Succ->front())
379044eb2f6SDimitry Andric                      << "Region dominated by "
380044eb2f6SDimitry Andric                      << ore::NV("Block", BlockList.front()->getName())
381044eb2f6SDimitry Andric                      << " has more than one region exit edge.";
382044eb2f6SDimitry Andric             });
383044eb2f6SDimitry Andric             return nullptr;
384b60736ecSDimitry Andric           }
385b60736ecSDimitry Andric 
386044eb2f6SDimitry Andric           ExitBlock = Block;
387044eb2f6SDimitry Andric         }
388044eb2f6SDimitry Andric       }
389044eb2f6SDimitry Andric     }
390044eb2f6SDimitry Andric     return ExitBlock;
391044eb2f6SDimitry Andric   };
392044eb2f6SDimitry Andric 
393044eb2f6SDimitry Andric   auto BBProfileCount = [BFI](BasicBlock *BB) {
394145449b1SDimitry Andric     return BFI->getBlockProfileCount(BB).value_or(0);
395044eb2f6SDimitry Andric   };
396044eb2f6SDimitry Andric 
397044eb2f6SDimitry Andric   // Use the same computeBBInlineCost function to compute the cost savings of
398044eb2f6SDimitry Andric   // the outlining the candidate region.
399b60736ecSDimitry Andric   TargetTransformInfo *FTTI = &GetTTI(F);
400344a3780SDimitry Andric   InstructionCost OverallFunctionCost = 0;
401b60736ecSDimitry Andric   for (auto &BB : F)
402b60736ecSDimitry Andric     OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
403044eb2f6SDimitry Andric 
404b60736ecSDimitry Andric   LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
405b60736ecSDimitry Andric                     << "\n";);
406b60736ecSDimitry Andric 
407344a3780SDimitry Andric   InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
408344a3780SDimitry Andric       [&](auto Cost) { return Cost * MinRegionSizeRatio; });
409344a3780SDimitry Andric 
410044eb2f6SDimitry Andric   BranchProbability MinBranchProbability(
411044eb2f6SDimitry Andric       static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
412044eb2f6SDimitry Andric       MinBlockCounterExecution);
413044eb2f6SDimitry Andric   bool ColdCandidateFound = false;
414044eb2f6SDimitry Andric   BasicBlock *CurrEntry = EntryBlock;
415044eb2f6SDimitry Andric   std::vector<BasicBlock *> DFS;
416044eb2f6SDimitry Andric   DenseMap<BasicBlock *, bool> VisitedMap;
417044eb2f6SDimitry Andric   DFS.push_back(CurrEntry);
418044eb2f6SDimitry Andric   VisitedMap[CurrEntry] = true;
419b60736ecSDimitry Andric 
420044eb2f6SDimitry Andric   // Use Depth First Search on the basic blocks to find CFG edges that are
421044eb2f6SDimitry Andric   // considered cold.
422044eb2f6SDimitry Andric   // Cold regions considered must also have its inline cost compared to the
423044eb2f6SDimitry Andric   // overall inline cost of the original function.  The region is outlined only
424044eb2f6SDimitry Andric   // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
425044eb2f6SDimitry Andric   // more.
426044eb2f6SDimitry Andric   while (!DFS.empty()) {
427b60736ecSDimitry Andric     auto *ThisBB = DFS.back();
428044eb2f6SDimitry Andric     DFS.pop_back();
429044eb2f6SDimitry Andric     // Only consider regions with predecessor blocks that are considered
430044eb2f6SDimitry Andric     // not-cold (default: part of the top 99.99% of all block counters)
431044eb2f6SDimitry Andric     // AND greater than our minimum block execution count (default: 100).
432b60736ecSDimitry Andric     if (PSI.isColdBlock(ThisBB, BFI) ||
433b60736ecSDimitry Andric         BBProfileCount(ThisBB) < MinBlockCounterExecution)
434044eb2f6SDimitry Andric       continue;
435b60736ecSDimitry Andric     for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
436044eb2f6SDimitry Andric       if (VisitedMap[*SI])
437044eb2f6SDimitry Andric         continue;
438044eb2f6SDimitry Andric       VisitedMap[*SI] = true;
439044eb2f6SDimitry Andric       DFS.push_back(*SI);
440044eb2f6SDimitry Andric       // If branch isn't cold, we skip to the next one.
441b60736ecSDimitry Andric       BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
442044eb2f6SDimitry Andric       if (SuccProb > MinBranchProbability)
443044eb2f6SDimitry Andric         continue;
444b60736ecSDimitry Andric 
445b60736ecSDimitry Andric       LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
446b60736ecSDimitry Andric                         << SI->getName()
447b60736ecSDimitry Andric                         << "\nBranch Probability = " << SuccProb << "\n";);
448b60736ecSDimitry Andric 
449044eb2f6SDimitry Andric       SmallVector<BasicBlock *, 8> DominateVector;
450044eb2f6SDimitry Andric       DT.getDescendants(*SI, DominateVector);
451b60736ecSDimitry Andric       assert(!DominateVector.empty() &&
452b60736ecSDimitry Andric              "SI should be reachable and have at least itself as descendant");
453b60736ecSDimitry Andric 
454044eb2f6SDimitry Andric       // We can only outline single entry regions (for now).
455b60736ecSDimitry Andric       if (!DominateVector.front()->hasNPredecessors(1)) {
456b60736ecSDimitry Andric         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
457b60736ecSDimitry Andric                           << " doesn't have a single predecessor in the "
458b60736ecSDimitry Andric                              "dominator tree\n";);
459044eb2f6SDimitry Andric         continue;
460b60736ecSDimitry Andric       }
461b60736ecSDimitry Andric 
462044eb2f6SDimitry Andric       BasicBlock *ExitBlock = nullptr;
463044eb2f6SDimitry Andric       // We can only outline single exit regions (for now).
464b60736ecSDimitry Andric       if (!(ExitBlock = IsSingleExit(DominateVector))) {
465b60736ecSDimitry Andric         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
466b60736ecSDimitry Andric                           << " doesn't have a unique successor\n";);
467044eb2f6SDimitry Andric         continue;
468b60736ecSDimitry Andric       }
469b60736ecSDimitry Andric 
470344a3780SDimitry Andric       InstructionCost OutlineRegionCost = 0;
471044eb2f6SDimitry Andric       for (auto *BB : DominateVector)
472b60736ecSDimitry Andric         OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
473044eb2f6SDimitry Andric 
474b60736ecSDimitry Andric       LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
475b60736ecSDimitry Andric                         << "\n";);
476044eb2f6SDimitry Andric 
477b60736ecSDimitry Andric       if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
478044eb2f6SDimitry Andric         ORE.emit([&]() {
479044eb2f6SDimitry Andric           return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
480044eb2f6SDimitry Andric                                             &SI->front())
481b60736ecSDimitry Andric                  << ore::NV("Callee", &F)
482b60736ecSDimitry Andric                  << " inline cost-savings smaller than "
483044eb2f6SDimitry Andric                  << ore::NV("Cost", MinOutlineRegionCost);
484044eb2f6SDimitry Andric         });
485b60736ecSDimitry Andric 
486b60736ecSDimitry Andric         LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
487b60736ecSDimitry Andric                           << MinOutlineRegionCost << "\n";);
488044eb2f6SDimitry Andric         continue;
489044eb2f6SDimitry Andric       }
490b60736ecSDimitry Andric 
491044eb2f6SDimitry Andric       // For now, ignore blocks that belong to a SISE region that is a
492044eb2f6SDimitry Andric       // candidate for outlining.  In the future, we may want to look
493044eb2f6SDimitry Andric       // at inner regions because the outer region may have live-exit
494044eb2f6SDimitry Andric       // variables.
495044eb2f6SDimitry Andric       for (auto *BB : DominateVector)
496044eb2f6SDimitry Andric         VisitedMap[BB] = true;
497b60736ecSDimitry Andric 
498044eb2f6SDimitry Andric       // ReturnBlock here means the block after the outline call
499044eb2f6SDimitry Andric       BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
500044eb2f6SDimitry Andric       FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
501044eb2f6SDimitry Andric           DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
502044eb2f6SDimitry Andric       OutliningInfo->ORI.push_back(RegInfo);
503b60736ecSDimitry Andric       LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
504b60736ecSDimitry Andric                         << DominateVector.front()->getName() << "\n";);
505044eb2f6SDimitry Andric       ColdCandidateFound = true;
506044eb2f6SDimitry Andric       NumColdRegionsFound++;
507044eb2f6SDimitry Andric     }
508044eb2f6SDimitry Andric   }
509b60736ecSDimitry Andric 
510044eb2f6SDimitry Andric   if (ColdCandidateFound)
511044eb2f6SDimitry Andric     return OutliningInfo;
512b60736ecSDimitry Andric 
513044eb2f6SDimitry Andric   return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
5141a82d4c0SDimitry Andric }
515600c6fa1SEd Schouten 
516a303c417SDimitry Andric std::unique_ptr<FunctionOutliningInfo>
computeOutliningInfo(Function & F) const517b60736ecSDimitry Andric PartialInlinerImpl::computeOutliningInfo(Function &F) const {
518b60736ecSDimitry Andric   BasicBlock *EntryBlock = &F.front();
519b915e9e0SDimitry Andric   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
52059850d08SRoman Divacky   if (!BR || BR->isUnconditional())
521a303c417SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
522600c6fa1SEd Schouten 
523a303c417SDimitry Andric   // Returns true if Succ is BB's successor
524a303c417SDimitry Andric   auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
525a303c417SDimitry Andric     return is_contained(successors(BB), Succ);
526a303c417SDimitry Andric   };
527a303c417SDimitry Andric 
528a303c417SDimitry Andric   auto IsReturnBlock = [](BasicBlock *BB) {
529d8e91e46SDimitry Andric     Instruction *TI = BB->getTerminator();
530a303c417SDimitry Andric     return isa<ReturnInst>(TI);
531a303c417SDimitry Andric   };
532a303c417SDimitry Andric 
5336b3f41edSDimitry Andric   auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
534a303c417SDimitry Andric     if (IsReturnBlock(Succ1))
535a303c417SDimitry Andric       return std::make_tuple(Succ1, Succ2);
536a303c417SDimitry Andric     if (IsReturnBlock(Succ2))
537a303c417SDimitry Andric       return std::make_tuple(Succ2, Succ1);
538a303c417SDimitry Andric 
539a303c417SDimitry Andric     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
540a303c417SDimitry Andric   };
541a303c417SDimitry Andric 
542a303c417SDimitry Andric   // Detect a triangular shape:
5436b3f41edSDimitry Andric   auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
544a303c417SDimitry Andric     if (IsSuccessor(Succ1, Succ2))
545a303c417SDimitry Andric       return std::make_tuple(Succ1, Succ2);
546a303c417SDimitry Andric     if (IsSuccessor(Succ2, Succ1))
547a303c417SDimitry Andric       return std::make_tuple(Succ2, Succ1);
548a303c417SDimitry Andric 
549a303c417SDimitry Andric     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
550a303c417SDimitry Andric   };
551a303c417SDimitry Andric 
552a303c417SDimitry Andric   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
5531d5ae102SDimitry Andric       std::make_unique<FunctionOutliningInfo>();
554a303c417SDimitry Andric 
555a303c417SDimitry Andric   BasicBlock *CurrEntry = EntryBlock;
556a303c417SDimitry Andric   bool CandidateFound = false;
557a303c417SDimitry Andric   do {
558a303c417SDimitry Andric     // The number of blocks to be inlined has already reached
559a303c417SDimitry Andric     // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
560a303c417SDimitry Andric     // disables partial inlining for the function.
561b60736ecSDimitry Andric     if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
562a303c417SDimitry Andric       break;
563a303c417SDimitry Andric 
564eb11fae6SDimitry Andric     if (succ_size(CurrEntry) != 2)
565a303c417SDimitry Andric       break;
566a303c417SDimitry Andric 
567a303c417SDimitry Andric     BasicBlock *Succ1 = *succ_begin(CurrEntry);
568a303c417SDimitry Andric     BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
569a303c417SDimitry Andric 
570a303c417SDimitry Andric     BasicBlock *ReturnBlock, *NonReturnBlock;
571a303c417SDimitry Andric     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
572a303c417SDimitry Andric 
573a303c417SDimitry Andric     if (ReturnBlock) {
574a303c417SDimitry Andric       OutliningInfo->Entries.push_back(CurrEntry);
575a303c417SDimitry Andric       OutliningInfo->ReturnBlock = ReturnBlock;
576a303c417SDimitry Andric       OutliningInfo->NonReturnBlock = NonReturnBlock;
577a303c417SDimitry Andric       CandidateFound = true;
578a303c417SDimitry Andric       break;
5795a5ac124SDimitry Andric     }
580600c6fa1SEd Schouten 
581b60736ecSDimitry Andric     BasicBlock *CommSucc, *OtherSucc;
582a303c417SDimitry Andric     std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
583a303c417SDimitry Andric 
584a303c417SDimitry Andric     if (!CommSucc)
585a303c417SDimitry Andric       break;
586a303c417SDimitry Andric 
587a303c417SDimitry Andric     OutliningInfo->Entries.push_back(CurrEntry);
588a303c417SDimitry Andric     CurrEntry = OtherSucc;
589a303c417SDimitry Andric   } while (true);
590a303c417SDimitry Andric 
591a303c417SDimitry Andric   if (!CandidateFound)
592a303c417SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
593a303c417SDimitry Andric 
594f65dcba8SDimitry Andric   // There should not be any successors (not in the entry set) other than
595a303c417SDimitry Andric   // {ReturnBlock, NonReturnBlock}
596b60736ecSDimitry Andric   assert(OutliningInfo->Entries[0] == &F.front() &&
5976b3f41edSDimitry Andric          "Function Entry must be the first in Entries vector");
598a303c417SDimitry Andric   DenseSet<BasicBlock *> Entries;
599a303c417SDimitry Andric   for (BasicBlock *E : OutliningInfo->Entries)
600a303c417SDimitry Andric     Entries.insert(E);
601a303c417SDimitry Andric 
602a303c417SDimitry Andric   // Returns true of BB has Predecessor which is not
603a303c417SDimitry Andric   // in Entries set.
604a303c417SDimitry Andric   auto HasNonEntryPred = [Entries](BasicBlock *BB) {
605b60736ecSDimitry Andric     for (auto *Pred : predecessors(BB)) {
606a303c417SDimitry Andric       if (!Entries.count(Pred))
607a303c417SDimitry Andric         return true;
608a303c417SDimitry Andric     }
609a303c417SDimitry Andric     return false;
610a303c417SDimitry Andric   };
611a303c417SDimitry Andric   auto CheckAndNormalizeCandidate =
612a303c417SDimitry Andric       [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
613a303c417SDimitry Andric         for (BasicBlock *E : OutliningInfo->Entries) {
614b60736ecSDimitry Andric           for (auto *Succ : successors(E)) {
615a303c417SDimitry Andric             if (Entries.count(Succ))
616a303c417SDimitry Andric               continue;
617a303c417SDimitry Andric             if (Succ == OutliningInfo->ReturnBlock)
618a303c417SDimitry Andric               OutliningInfo->ReturnBlockPreds.push_back(E);
619a303c417SDimitry Andric             else if (Succ != OutliningInfo->NonReturnBlock)
620a303c417SDimitry Andric               return false;
621a303c417SDimitry Andric           }
622a303c417SDimitry Andric           // There should not be any outside incoming edges either:
623a303c417SDimitry Andric           if (HasNonEntryPred(E))
624a303c417SDimitry Andric             return false;
625a303c417SDimitry Andric         }
626a303c417SDimitry Andric         return true;
627a303c417SDimitry Andric       };
628a303c417SDimitry Andric 
629a303c417SDimitry Andric   if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
630a303c417SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
631a303c417SDimitry Andric 
632a303c417SDimitry Andric   // Now further growing the candidate's inlining region by
633a303c417SDimitry Andric   // peeling off dominating blocks from the outlining region:
634b60736ecSDimitry Andric   while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
635a303c417SDimitry Andric     BasicBlock *Cand = OutliningInfo->NonReturnBlock;
636eb11fae6SDimitry Andric     if (succ_size(Cand) != 2)
637a303c417SDimitry Andric       break;
638a303c417SDimitry Andric 
639a303c417SDimitry Andric     if (HasNonEntryPred(Cand))
640a303c417SDimitry Andric       break;
641a303c417SDimitry Andric 
642a303c417SDimitry Andric     BasicBlock *Succ1 = *succ_begin(Cand);
643a303c417SDimitry Andric     BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
644a303c417SDimitry Andric 
645a303c417SDimitry Andric     BasicBlock *ReturnBlock, *NonReturnBlock;
646a303c417SDimitry Andric     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
647a303c417SDimitry Andric     if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
648a303c417SDimitry Andric       break;
649a303c417SDimitry Andric 
650a303c417SDimitry Andric     if (NonReturnBlock->getSinglePredecessor() != Cand)
651a303c417SDimitry Andric       break;
652a303c417SDimitry Andric 
653a303c417SDimitry Andric     // Now grow and update OutlininigInfo:
654a303c417SDimitry Andric     OutliningInfo->Entries.push_back(Cand);
655a303c417SDimitry Andric     OutliningInfo->NonReturnBlock = NonReturnBlock;
656a303c417SDimitry Andric     OutliningInfo->ReturnBlockPreds.push_back(Cand);
657a303c417SDimitry Andric     Entries.insert(Cand);
658a303c417SDimitry Andric   }
659a303c417SDimitry Andric 
660a303c417SDimitry Andric   return OutliningInfo;
661a303c417SDimitry Andric }
662a303c417SDimitry Andric 
663706b4fc4SDimitry Andric // Check if there is PGO data or user annotated branch data:
hasProfileData(const Function & F,const FunctionOutliningInfo & OI)664b60736ecSDimitry Andric static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
665b60736ecSDimitry Andric   if (F.hasProfileData())
6666b3f41edSDimitry Andric     return true;
6676b3f41edSDimitry Andric   // Now check if any of the entry block has MD_prof data:
668b60736ecSDimitry Andric   for (auto *E : OI.Entries) {
6696b3f41edSDimitry Andric     BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
6706b3f41edSDimitry Andric     if (!BR || BR->isUnconditional())
6716b3f41edSDimitry Andric       continue;
672e3b55780SDimitry Andric     if (hasBranchWeightMD(*BR))
6736b3f41edSDimitry Andric       return true;
6746b3f41edSDimitry Andric   }
6756b3f41edSDimitry Andric   return false;
6766b3f41edSDimitry Andric }
6776b3f41edSDimitry Andric 
getOutliningCallBBRelativeFreq(FunctionCloner & Cloner) const678b60736ecSDimitry Andric BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
679b60736ecSDimitry Andric     FunctionCloner &Cloner) const {
680044eb2f6SDimitry Andric   BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
6816b3f41edSDimitry Andric   auto EntryFreq =
6827c7aba6eSDimitry Andric       Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
6837c7aba6eSDimitry Andric   auto OutliningCallFreq =
684044eb2f6SDimitry Andric       Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
685044eb2f6SDimitry Andric   // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
686044eb2f6SDimitry Andric   // we outlined any regions, so we may encounter situations where the
687044eb2f6SDimitry Andric   // OutliningCallFreq is *slightly* bigger than the EntryFreq.
688b60736ecSDimitry Andric   if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
689044eb2f6SDimitry Andric     OutliningCallFreq = EntryFreq;
690b60736ecSDimitry Andric 
691044eb2f6SDimitry Andric   auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
692044eb2f6SDimitry Andric       OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
6936b3f41edSDimitry Andric 
694145449b1SDimitry Andric   if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
6956b3f41edSDimitry Andric     return OutlineRegionRelFreq;
6966b3f41edSDimitry Andric 
697d288ef4cSDimitry Andric   // When profile data is not available, we need to be conservative in
698d288ef4cSDimitry Andric   // estimating the overall savings. Static branch prediction can usually
699d288ef4cSDimitry Andric   // guess the branch direction right (taken/non-taken), but the guessed
700d288ef4cSDimitry Andric   // branch probability is usually not biased enough. In case when the
701d288ef4cSDimitry Andric   // outlined region is predicted to be likely, its probability needs
702d288ef4cSDimitry Andric   // to be made higher (more biased) to not under-estimate the cost of
703d288ef4cSDimitry Andric   // function outlining. On the other hand, if the outlined region
704d288ef4cSDimitry Andric   // is predicted to be less likely, the predicted probablity is usually
705d288ef4cSDimitry Andric   // higher than the actual. For instance, the actual probability of the
706d288ef4cSDimitry Andric   // less likely target is only 5%, but the guessed probablity can be
707e3b55780SDimitry Andric   // 40%. In the latter case, there is no need for further adjustment.
708d288ef4cSDimitry Andric   // FIXME: add an option for this.
709d288ef4cSDimitry Andric   if (OutlineRegionRelFreq < BranchProbability(45, 100))
710d288ef4cSDimitry Andric     return OutlineRegionRelFreq;
711d288ef4cSDimitry Andric 
712d288ef4cSDimitry Andric   OutlineRegionRelFreq = std::max(
713d288ef4cSDimitry Andric       OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
7146b3f41edSDimitry Andric 
7156b3f41edSDimitry Andric   return OutlineRegionRelFreq;
7166b3f41edSDimitry Andric }
7176b3f41edSDimitry Andric 
shouldPartialInline(CallBase & CB,FunctionCloner & Cloner,BlockFrequency WeightedOutliningRcost,OptimizationRemarkEmitter & ORE) const7186b3f41edSDimitry Andric bool PartialInlinerImpl::shouldPartialInline(
719cfca06d7SDimitry Andric     CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
720b60736ecSDimitry Andric     OptimizationRemarkEmitter &ORE) const {
721a303c417SDimitry Andric   using namespace ore;
722044eb2f6SDimitry Andric 
723cfca06d7SDimitry Andric   Function *Callee = CB.getCalledFunction();
7247c7aba6eSDimitry Andric   assert(Callee == Cloner.ClonedFunc);
7257c7aba6eSDimitry Andric 
726eb11fae6SDimitry Andric   if (SkipCostAnalysis)
727cfca06d7SDimitry Andric     return isInlineViable(*Callee).isSuccess();
728eb11fae6SDimitry Andric 
729cfca06d7SDimitry Andric   Function *Caller = CB.getCaller();
730cfca06d7SDimitry Andric   auto &CalleeTTI = GetTTI(*Callee);
731e6d15924SDimitry Andric   bool RemarksEnabled =
732e6d15924SDimitry Andric       Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
733e6d15924SDimitry Andric           DEBUG_TYPE);
734cfca06d7SDimitry Andric   InlineCost IC =
735cfca06d7SDimitry Andric       getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
736cfca06d7SDimitry Andric                     GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
737a303c417SDimitry Andric 
738a303c417SDimitry Andric   if (IC.isAlways()) {
739044eb2f6SDimitry Andric     ORE.emit([&]() {
740cfca06d7SDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
7417c7aba6eSDimitry Andric              << NV("Callee", Cloner.OrigFunc)
742044eb2f6SDimitry Andric              << " should always be fully inlined, not partially";
743044eb2f6SDimitry Andric     });
744a303c417SDimitry Andric     return false;
745a303c417SDimitry Andric   }
746a303c417SDimitry Andric 
747a303c417SDimitry Andric   if (IC.isNever()) {
748044eb2f6SDimitry Andric     ORE.emit([&]() {
749cfca06d7SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
7507c7aba6eSDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
751a303c417SDimitry Andric              << NV("Caller", Caller)
752044eb2f6SDimitry Andric              << " because it should never be inlined (cost=never)";
753044eb2f6SDimitry Andric     });
754a303c417SDimitry Andric     return false;
755a303c417SDimitry Andric   }
756a303c417SDimitry Andric 
757a303c417SDimitry Andric   if (!IC) {
758044eb2f6SDimitry Andric     ORE.emit([&]() {
759cfca06d7SDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
7607c7aba6eSDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
761a303c417SDimitry Andric              << NV("Caller", Caller) << " because too costly to inline (cost="
762a303c417SDimitry Andric              << NV("Cost", IC.getCost()) << ", threshold="
763044eb2f6SDimitry Andric              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
764044eb2f6SDimitry Andric     });
765a303c417SDimitry Andric     return false;
766a303c417SDimitry Andric   }
767ac9a064cSDimitry Andric   const DataLayout &DL = Caller->getDataLayout();
7687c7aba6eSDimitry Andric 
7696b3f41edSDimitry Andric   // The savings of eliminating the call:
770b1c73532SDimitry Andric   int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
7716b3f41edSDimitry Andric   BlockFrequency NormWeightedSavings(NonWeightedSavings);
7726b3f41edSDimitry Andric 
7736b3f41edSDimitry Andric   // Weighted saving is smaller than weighted cost, return false
7747c7aba6eSDimitry Andric   if (NormWeightedSavings < WeightedOutliningRcost) {
775044eb2f6SDimitry Andric     ORE.emit([&]() {
776044eb2f6SDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
777cfca06d7SDimitry Andric                                         &CB)
7787c7aba6eSDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
7796b3f41edSDimitry Andric              << NV("Caller", Caller) << " runtime overhead (overhead="
7807c7aba6eSDimitry Andric              << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
7816b3f41edSDimitry Andric              << ", savings="
782044eb2f6SDimitry Andric              << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
783044eb2f6SDimitry Andric              << ")"
784044eb2f6SDimitry Andric              << " of making the outlined call is too high";
785044eb2f6SDimitry Andric     });
7866b3f41edSDimitry Andric 
7876b3f41edSDimitry Andric     return false;
7886b3f41edSDimitry Andric   }
789a303c417SDimitry Andric 
790044eb2f6SDimitry Andric   ORE.emit([&]() {
791cfca06d7SDimitry Andric     return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
7927c7aba6eSDimitry Andric            << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
793a303c417SDimitry Andric            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
794a303c417SDimitry Andric            << " (threshold="
795044eb2f6SDimitry Andric            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
796044eb2f6SDimitry Andric   });
797a303c417SDimitry Andric   return true;
798a303c417SDimitry Andric }
799a303c417SDimitry Andric 
8006b3f41edSDimitry Andric // TODO: Ideally  we should share Inliner's InlineCost Analysis code.
8016b3f41edSDimitry Andric // For now use a simplified version. The returned 'InlineCost' will be used
8026b3f41edSDimitry Andric // to esimate the size cost as well as runtime cost of the BB.
803344a3780SDimitry Andric InstructionCost
computeBBInlineCost(BasicBlock * BB,TargetTransformInfo * TTI)804344a3780SDimitry Andric PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
805b60736ecSDimitry Andric                                         TargetTransformInfo *TTI) {
806344a3780SDimitry Andric   InstructionCost InlineCost = 0;
807ac9a064cSDimitry Andric   const DataLayout &DL = BB->getDataLayout();
808e3b55780SDimitry Andric   int InstrCost = InlineConstants::getInstrCost();
809d8e91e46SDimitry Andric   for (Instruction &I : BB->instructionsWithoutDebug()) {
810d8e91e46SDimitry Andric     // Skip free instructions.
811d8e91e46SDimitry Andric     switch (I.getOpcode()) {
812d288ef4cSDimitry Andric     case Instruction::BitCast:
813d288ef4cSDimitry Andric     case Instruction::PtrToInt:
814d288ef4cSDimitry Andric     case Instruction::IntToPtr:
815d288ef4cSDimitry Andric     case Instruction::Alloca:
816d8e91e46SDimitry Andric     case Instruction::PHI:
817d288ef4cSDimitry Andric       continue;
818d288ef4cSDimitry Andric     case Instruction::GetElementPtr:
819d8e91e46SDimitry Andric       if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
820d288ef4cSDimitry Andric         continue;
821c7dac04cSDimitry Andric       break;
822d288ef4cSDimitry Andric     default:
823d288ef4cSDimitry Andric       break;
824d288ef4cSDimitry Andric     }
825d288ef4cSDimitry Andric 
826d8e91e46SDimitry Andric     if (I.isLifetimeStartOrEnd())
827d288ef4cSDimitry Andric       continue;
828d288ef4cSDimitry Andric 
829b60736ecSDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
830b60736ecSDimitry Andric       Intrinsic::ID IID = II->getIntrinsicID();
831b60736ecSDimitry Andric       SmallVector<Type *, 4> Tys;
832b60736ecSDimitry Andric       FastMathFlags FMF;
833b60736ecSDimitry Andric       for (Value *Val : II->args())
834b60736ecSDimitry Andric         Tys.push_back(Val->getType());
835b60736ecSDimitry Andric 
836b60736ecSDimitry Andric       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
837b60736ecSDimitry Andric         FMF = FPMO->getFastMathFlags();
838b60736ecSDimitry Andric 
839b60736ecSDimitry Andric       IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
840b60736ecSDimitry Andric       InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
841b60736ecSDimitry Andric       continue;
842b60736ecSDimitry Andric     }
843b60736ecSDimitry Andric 
844d8e91e46SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
845b1c73532SDimitry Andric       InlineCost += getCallsiteCost(*TTI, *CI, DL);
8466b3f41edSDimitry Andric       continue;
8476b3f41edSDimitry Andric     }
8486b3f41edSDimitry Andric 
849d8e91e46SDimitry Andric     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
850b1c73532SDimitry Andric       InlineCost += getCallsiteCost(*TTI, *II, DL);
8516b3f41edSDimitry Andric       continue;
8526b3f41edSDimitry Andric     }
8536b3f41edSDimitry Andric 
854d8e91e46SDimitry Andric     if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
855e3b55780SDimitry Andric       InlineCost += (SI->getNumCases() + 1) * InstrCost;
8566b3f41edSDimitry Andric       continue;
8576b3f41edSDimitry Andric     }
858e3b55780SDimitry Andric     InlineCost += InstrCost;
8596b3f41edSDimitry Andric   }
860344a3780SDimitry Andric 
8616b3f41edSDimitry Andric   return InlineCost;
8626b3f41edSDimitry Andric }
8636b3f41edSDimitry Andric 
864344a3780SDimitry Andric std::tuple<InstructionCost, InstructionCost>
computeOutliningCosts(FunctionCloner & Cloner) const865b60736ecSDimitry Andric PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
866344a3780SDimitry Andric   InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
867044eb2f6SDimitry Andric   for (auto FuncBBPair : Cloner.OutlinedFunctions) {
868044eb2f6SDimitry Andric     Function *OutlinedFunc = FuncBBPair.first;
869044eb2f6SDimitry Andric     BasicBlock* OutliningCallBB = FuncBBPair.second;
8706b3f41edSDimitry Andric     // Now compute the cost of the call sequence to the outlined function
8716b3f41edSDimitry Andric     // 'OutlinedFunction' in BB 'OutliningCallBB':
872b60736ecSDimitry Andric     auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
873b60736ecSDimitry Andric     OutliningFuncCallCost +=
874b60736ecSDimitry Andric         computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
8756b3f41edSDimitry Andric 
8766b3f41edSDimitry Andric     // Now compute the cost of the extracted/outlined function itself:
877044eb2f6SDimitry Andric     for (BasicBlock &BB : *OutlinedFunc)
878b60736ecSDimitry Andric       OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
8796b3f41edSDimitry Andric   }
8807c7aba6eSDimitry Andric   assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
8816b3f41edSDimitry Andric          "Outlined function cost should be no less than the outlined region");
882044eb2f6SDimitry Andric 
883d288ef4cSDimitry Andric   // The code extractor introduces a new root and exit stub blocks with
884d288ef4cSDimitry Andric   // additional unconditional branches. Those branches will be eliminated
885d288ef4cSDimitry Andric   // later with bb layout. The cost should be adjusted accordingly:
886044eb2f6SDimitry Andric   OutlinedFunctionCost -=
887e3b55780SDimitry Andric       2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
888d288ef4cSDimitry Andric 
889344a3780SDimitry Andric   InstructionCost OutliningRuntimeOverhead =
8907c7aba6eSDimitry Andric       OutliningFuncCallCost +
8917c7aba6eSDimitry Andric       (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
892344a3780SDimitry Andric       ExtraOutliningPenalty.getValue();
8936b3f41edSDimitry Andric 
8947c7aba6eSDimitry Andric   return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
8956b3f41edSDimitry Andric }
8966b3f41edSDimitry Andric 
8976b3f41edSDimitry Andric // Create the callsite to profile count map which is
8986b3f41edSDimitry Andric // used to update the original function's entry count,
8996b3f41edSDimitry Andric // after the function is partially inlined into the callsite.
computeCallsiteToProfCountMap(Function * DuplicateFunction,DenseMap<User *,uint64_t> & CallSiteToProfCountMap) const9006b3f41edSDimitry Andric void PartialInlinerImpl::computeCallsiteToProfCountMap(
9016b3f41edSDimitry Andric     Function *DuplicateFunction,
902b60736ecSDimitry Andric     DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
9036b3f41edSDimitry Andric   std::vector<User *> Users(DuplicateFunction->user_begin(),
9046b3f41edSDimitry Andric                             DuplicateFunction->user_end());
9056b3f41edSDimitry Andric   Function *CurrentCaller = nullptr;
906ab44ce3dSDimitry Andric   std::unique_ptr<BlockFrequencyInfo> TempBFI;
9076b3f41edSDimitry Andric   BlockFrequencyInfo *CurrentCallerBFI = nullptr;
9086b3f41edSDimitry Andric 
9096b3f41edSDimitry Andric   auto ComputeCurrBFI = [&,this](Function *Caller) {
9106b3f41edSDimitry Andric       // For the old pass manager:
9116b3f41edSDimitry Andric       if (!GetBFI) {
9126b3f41edSDimitry Andric         DominatorTree DT(*Caller);
9136b3f41edSDimitry Andric         LoopInfo LI(DT);
9146b3f41edSDimitry Andric         BranchProbabilityInfo BPI(*Caller, LI);
915ab44ce3dSDimitry Andric         TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
916ab44ce3dSDimitry Andric         CurrentCallerBFI = TempBFI.get();
9176b3f41edSDimitry Andric       } else {
9186b3f41edSDimitry Andric         // New pass manager:
919cfca06d7SDimitry Andric         CurrentCallerBFI = &(GetBFI(*Caller));
9206b3f41edSDimitry Andric       }
9216b3f41edSDimitry Andric   };
9226b3f41edSDimitry Andric 
9236b3f41edSDimitry Andric   for (User *User : Users) {
9246f8fc217SDimitry Andric     // Don't bother with BlockAddress used by CallBr for asm goto.
9256f8fc217SDimitry Andric     if (isa<BlockAddress>(User))
9266f8fc217SDimitry Andric       continue;
927cfca06d7SDimitry Andric     CallBase *CB = getSupportedCallBase(User);
928cfca06d7SDimitry Andric     Function *Caller = CB->getCaller();
9296b3f41edSDimitry Andric     if (CurrentCaller != Caller) {
9306b3f41edSDimitry Andric       CurrentCaller = Caller;
9316b3f41edSDimitry Andric       ComputeCurrBFI(Caller);
9326b3f41edSDimitry Andric     } else {
9336b3f41edSDimitry Andric       assert(CurrentCallerBFI && "CallerBFI is not set");
9346b3f41edSDimitry Andric     }
935cfca06d7SDimitry Andric     BasicBlock *CallBB = CB->getParent();
9366b3f41edSDimitry Andric     auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
9376b3f41edSDimitry Andric     if (Count)
9386b3f41edSDimitry Andric       CallSiteToProfCountMap[User] = *Count;
9396b3f41edSDimitry Andric     else
9406b3f41edSDimitry Andric       CallSiteToProfCountMap[User] = 0;
9416b3f41edSDimitry Andric   }
9426b3f41edSDimitry Andric }
9436b3f41edSDimitry Andric 
FunctionCloner(Function * F,FunctionOutliningInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)944044eb2f6SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
945e6d15924SDimitry Andric     Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
946b60736ecSDimitry Andric     function_ref<AssumptionCache *(Function &)> LookupAC,
947b60736ecSDimitry Andric     function_ref<TargetTransformInfo &(Function &)> GetTTI)
948b60736ecSDimitry Andric     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
9491d5ae102SDimitry Andric   ClonedOI = std::make_unique<FunctionOutliningInfo>();
9507c7aba6eSDimitry Andric 
9517c7aba6eSDimitry Andric   // Clone the function, so that we can hack away on it.
9527c7aba6eSDimitry Andric   ValueToValueMapTy VMap;
9537c7aba6eSDimitry Andric   ClonedFunc = CloneFunction(F, VMap);
9547c7aba6eSDimitry Andric 
9557c7aba6eSDimitry Andric   ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
9567c7aba6eSDimitry Andric   ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
957b60736ecSDimitry Andric   for (BasicBlock *BB : OI->Entries)
9587c7aba6eSDimitry Andric     ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
959b60736ecSDimitry Andric 
9607c7aba6eSDimitry Andric   for (BasicBlock *E : OI->ReturnBlockPreds) {
9617c7aba6eSDimitry Andric     BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
9627c7aba6eSDimitry Andric     ClonedOI->ReturnBlockPreds.push_back(NewE);
9637c7aba6eSDimitry Andric   }
9647c7aba6eSDimitry Andric   // Go ahead and update all uses to the duplicate, so that we can just
9657c7aba6eSDimitry Andric   // use the inliner functionality when we're done hacking.
9667c7aba6eSDimitry Andric   F->replaceAllUsesWith(ClonedFunc);
9677c7aba6eSDimitry Andric }
9687c7aba6eSDimitry Andric 
FunctionCloner(Function * F,FunctionOutliningMultiRegionInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)969044eb2f6SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
970044eb2f6SDimitry Andric     Function *F, FunctionOutliningMultiRegionInfo *OI,
971e6d15924SDimitry Andric     OptimizationRemarkEmitter &ORE,
972b60736ecSDimitry Andric     function_ref<AssumptionCache *(Function &)> LookupAC,
973b60736ecSDimitry Andric     function_ref<TargetTransformInfo &(Function &)> GetTTI)
974b60736ecSDimitry Andric     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
9751d5ae102SDimitry Andric   ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
9767c7aba6eSDimitry Andric 
977044eb2f6SDimitry Andric   // Clone the function, so that we can hack away on it.
978044eb2f6SDimitry Andric   ValueToValueMapTy VMap;
979044eb2f6SDimitry Andric   ClonedFunc = CloneFunction(F, VMap);
980044eb2f6SDimitry Andric 
981044eb2f6SDimitry Andric   // Go through all Outline Candidate Regions and update all BasicBlock
982044eb2f6SDimitry Andric   // information.
9837fa27ce4SDimitry Andric   for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
984044eb2f6SDimitry Andric        OI->ORI) {
985044eb2f6SDimitry Andric     SmallVector<BasicBlock *, 8> Region;
986b60736ecSDimitry Andric     for (BasicBlock *BB : RegionInfo.Region)
987044eb2f6SDimitry Andric       Region.push_back(cast<BasicBlock>(VMap[BB]));
988b60736ecSDimitry Andric 
989044eb2f6SDimitry Andric     BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
990044eb2f6SDimitry Andric     BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
991044eb2f6SDimitry Andric     BasicBlock *NewReturnBlock = nullptr;
992044eb2f6SDimitry Andric     if (RegionInfo.ReturnBlock)
993044eb2f6SDimitry Andric       NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
994044eb2f6SDimitry Andric     FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
995044eb2f6SDimitry Andric         Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
996044eb2f6SDimitry Andric     ClonedOMRI->ORI.push_back(MappedRegionInfo);
997044eb2f6SDimitry Andric   }
998044eb2f6SDimitry Andric   // Go ahead and update all uses to the duplicate, so that we can just
999044eb2f6SDimitry Andric   // use the inliner functionality when we're done hacking.
1000044eb2f6SDimitry Andric   F->replaceAllUsesWith(ClonedFunc);
1001044eb2f6SDimitry Andric }
1002044eb2f6SDimitry Andric 
normalizeReturnBlock() const1003b60736ecSDimitry Andric void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1004b60736ecSDimitry Andric   auto GetFirstPHI = [](BasicBlock *BB) {
10057c7aba6eSDimitry Andric     BasicBlock::iterator I = BB->begin();
10067c7aba6eSDimitry Andric     PHINode *FirstPhi = nullptr;
10077c7aba6eSDimitry Andric     while (I != BB->end()) {
10087c7aba6eSDimitry Andric       PHINode *Phi = dyn_cast<PHINode>(I);
10097c7aba6eSDimitry Andric       if (!Phi)
10107c7aba6eSDimitry Andric         break;
10117c7aba6eSDimitry Andric       if (!FirstPhi) {
10127c7aba6eSDimitry Andric         FirstPhi = Phi;
10137c7aba6eSDimitry Andric         break;
10147c7aba6eSDimitry Andric       }
10157c7aba6eSDimitry Andric     }
10167c7aba6eSDimitry Andric     return FirstPhi;
10177c7aba6eSDimitry Andric   };
10187c7aba6eSDimitry Andric 
1019044eb2f6SDimitry Andric   // Shouldn't need to normalize PHIs if we're not outlining non-early return
1020044eb2f6SDimitry Andric   // blocks.
1021044eb2f6SDimitry Andric   if (!ClonedOI)
1022044eb2f6SDimitry Andric     return;
1023044eb2f6SDimitry Andric 
10247c7aba6eSDimitry Andric   // Special hackery is needed with PHI nodes that have inputs from more than
10257c7aba6eSDimitry Andric   // one extracted block.  For simplicity, just split the PHIs into a two-level
10267c7aba6eSDimitry Andric   // sequence of PHIs, some of which will go in the extracted region, and some
10277c7aba6eSDimitry Andric   // of which will go outside.
10287c7aba6eSDimitry Andric   BasicBlock *PreReturn = ClonedOI->ReturnBlock;
10297c7aba6eSDimitry Andric   // only split block when necessary:
1030b60736ecSDimitry Andric   PHINode *FirstPhi = GetFirstPHI(PreReturn);
10317c7aba6eSDimitry Andric   unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
10327c7aba6eSDimitry Andric 
10337c7aba6eSDimitry Andric   if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
10347c7aba6eSDimitry Andric     return;
10357c7aba6eSDimitry Andric 
10367c7aba6eSDimitry Andric   auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1037e3b55780SDimitry Andric     if (llvm::all_equal(PN->incoming_values()))
1038e3b55780SDimitry Andric       return PN->getIncomingValue(0);
10397c7aba6eSDimitry Andric     return nullptr;
10407c7aba6eSDimitry Andric   };
10417c7aba6eSDimitry Andric 
10427c7aba6eSDimitry Andric   ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
10437c7aba6eSDimitry Andric       ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
10447c7aba6eSDimitry Andric   BasicBlock::iterator I = PreReturn->begin();
1045b1c73532SDimitry Andric   BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
10467c7aba6eSDimitry Andric   SmallVector<Instruction *, 4> DeadPhis;
10477c7aba6eSDimitry Andric   while (I != PreReturn->end()) {
10487c7aba6eSDimitry Andric     PHINode *OldPhi = dyn_cast<PHINode>(I);
10497c7aba6eSDimitry Andric     if (!OldPhi)
10507c7aba6eSDimitry Andric       break;
10517c7aba6eSDimitry Andric 
10527c7aba6eSDimitry Andric     PHINode *RetPhi =
1053b1c73532SDimitry Andric         PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
1054b1c73532SDimitry Andric     RetPhi->insertBefore(Ins);
10557c7aba6eSDimitry Andric     OldPhi->replaceAllUsesWith(RetPhi);
1056b1c73532SDimitry Andric     Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
10577c7aba6eSDimitry Andric 
10587c7aba6eSDimitry Andric     RetPhi->addIncoming(&*I, PreReturn);
10597c7aba6eSDimitry Andric     for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
10607c7aba6eSDimitry Andric       RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
10617c7aba6eSDimitry Andric       OldPhi->removeIncomingValue(E);
10627c7aba6eSDimitry Andric     }
10637c7aba6eSDimitry Andric 
10647c7aba6eSDimitry Andric     // After incoming values splitting, the old phi may become trivial.
10657c7aba6eSDimitry Andric     // Keeping the trivial phi can introduce definition inside the outline
10667c7aba6eSDimitry Andric     // region which is live-out, causing necessary overhead (load, store
10677c7aba6eSDimitry Andric     // arg passing etc).
10687c7aba6eSDimitry Andric     if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
10697c7aba6eSDimitry Andric       OldPhi->replaceAllUsesWith(OldPhiVal);
10707c7aba6eSDimitry Andric       DeadPhis.push_back(OldPhi);
10717c7aba6eSDimitry Andric     }
10727c7aba6eSDimitry Andric     ++I;
10737c7aba6eSDimitry Andric   }
10747c7aba6eSDimitry Andric   for (auto *DP : DeadPhis)
10757c7aba6eSDimitry Andric     DP->eraseFromParent();
10767c7aba6eSDimitry Andric 
1077b60736ecSDimitry Andric   for (auto *E : ClonedOI->ReturnBlockPreds)
10787c7aba6eSDimitry Andric     E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
10797c7aba6eSDimitry Andric }
10807c7aba6eSDimitry Andric 
doMultiRegionFunctionOutlining()1081044eb2f6SDimitry Andric bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1082044eb2f6SDimitry Andric 
1083344a3780SDimitry Andric   auto ComputeRegionCost =
1084344a3780SDimitry Andric       [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
1085344a3780SDimitry Andric     InstructionCost Cost = 0;
1086044eb2f6SDimitry Andric     for (BasicBlock* BB : Region)
1087b60736ecSDimitry Andric       Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1088044eb2f6SDimitry Andric     return Cost;
1089044eb2f6SDimitry Andric   };
1090044eb2f6SDimitry Andric 
1091044eb2f6SDimitry Andric   assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1092044eb2f6SDimitry Andric 
1093044eb2f6SDimitry Andric   if (ClonedOMRI->ORI.empty())
1094044eb2f6SDimitry Andric     return false;
1095044eb2f6SDimitry Andric 
1096044eb2f6SDimitry Andric   // The CodeExtractor needs a dominator tree.
1097044eb2f6SDimitry Andric   DominatorTree DT;
1098044eb2f6SDimitry Andric   DT.recalculate(*ClonedFunc);
1099044eb2f6SDimitry Andric 
1100044eb2f6SDimitry Andric   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1101044eb2f6SDimitry Andric   LoopInfo LI(DT);
1102044eb2f6SDimitry Andric   BranchProbabilityInfo BPI(*ClonedFunc, LI);
1103044eb2f6SDimitry Andric   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1104044eb2f6SDimitry Andric 
11051d5ae102SDimitry Andric   // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
11061d5ae102SDimitry Andric   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
11071d5ae102SDimitry Andric 
1108044eb2f6SDimitry Andric   SetVector<Value *> Inputs, Outputs, Sinks;
1109044eb2f6SDimitry Andric   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1110044eb2f6SDimitry Andric        ClonedOMRI->ORI) {
1111344a3780SDimitry Andric     InstructionCost CurrentOutlinedRegionCost =
1112344a3780SDimitry Andric         ComputeRegionCost(RegionInfo.Region);
1113044eb2f6SDimitry Andric 
1114044eb2f6SDimitry Andric     CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1115e6d15924SDimitry Andric                      ClonedFuncBFI.get(), &BPI,
1116e6d15924SDimitry Andric                      LookupAC(*RegionInfo.EntryBlock->getParent()),
1117e6d15924SDimitry Andric                      /* AllowVarargs */ false);
1118044eb2f6SDimitry Andric 
1119044eb2f6SDimitry Andric     CE.findInputsOutputs(Inputs, Outputs, Sinks);
1120044eb2f6SDimitry Andric 
1121b60736ecSDimitry Andric     LLVM_DEBUG({
1122044eb2f6SDimitry Andric       dbgs() << "inputs: " << Inputs.size() << "\n";
1123044eb2f6SDimitry Andric       dbgs() << "outputs: " << Outputs.size() << "\n";
1124044eb2f6SDimitry Andric       for (Value *value : Inputs)
1125044eb2f6SDimitry Andric         dbgs() << "value used in func: " << *value << "\n";
1126044eb2f6SDimitry Andric       for (Value *output : Outputs)
1127044eb2f6SDimitry Andric         dbgs() << "instr used in func: " << *output << "\n";
1128b60736ecSDimitry Andric     });
1129b60736ecSDimitry Andric 
1130044eb2f6SDimitry Andric     // Do not extract regions that have live exit variables.
1131044eb2f6SDimitry Andric     if (Outputs.size() > 0 && !ForceLiveExit)
1132044eb2f6SDimitry Andric       continue;
1133044eb2f6SDimitry Andric 
1134b60736ecSDimitry Andric     if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1135b60736ecSDimitry Andric       CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1136cfca06d7SDimitry Andric       BasicBlock *OutliningCallBB = OCS->getParent();
1137044eb2f6SDimitry Andric       assert(OutliningCallBB->getParent() == ClonedFunc);
1138044eb2f6SDimitry Andric       OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1139044eb2f6SDimitry Andric       NumColdRegionsOutlined++;
1140044eb2f6SDimitry Andric       OutlinedRegionCost += CurrentOutlinedRegionCost;
1141044eb2f6SDimitry Andric 
1142044eb2f6SDimitry Andric       if (MarkOutlinedColdCC) {
1143044eb2f6SDimitry Andric         OutlinedFunc->setCallingConv(CallingConv::Cold);
1144cfca06d7SDimitry Andric         OCS->setCallingConv(CallingConv::Cold);
1145044eb2f6SDimitry Andric       }
1146044eb2f6SDimitry Andric     } else
1147044eb2f6SDimitry Andric       ORE.emit([&]() {
1148044eb2f6SDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1149044eb2f6SDimitry Andric                                         &RegionInfo.Region.front()->front())
1150044eb2f6SDimitry Andric                << "Failed to extract region at block "
1151044eb2f6SDimitry Andric                << ore::NV("Block", RegionInfo.Region.front());
1152044eb2f6SDimitry Andric       });
1153044eb2f6SDimitry Andric   }
1154044eb2f6SDimitry Andric 
1155044eb2f6SDimitry Andric   return !OutlinedFunctions.empty();
1156044eb2f6SDimitry Andric }
1157044eb2f6SDimitry Andric 
1158044eb2f6SDimitry Andric Function *
doSingleRegionFunctionOutlining()1159044eb2f6SDimitry Andric PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
11607c7aba6eSDimitry Andric   // Returns true if the block is to be partial inlined into the caller
11617c7aba6eSDimitry Andric   // (i.e. not to be extracted to the out of line function)
11627c7aba6eSDimitry Andric   auto ToBeInlined = [&, this](BasicBlock *BB) {
11637c7aba6eSDimitry Andric     return BB == ClonedOI->ReturnBlock ||
1164b60736ecSDimitry Andric            llvm::is_contained(ClonedOI->Entries, BB);
11657c7aba6eSDimitry Andric   };
11667c7aba6eSDimitry Andric 
1167044eb2f6SDimitry Andric   assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1168044eb2f6SDimitry Andric   // The CodeExtractor needs a dominator tree.
1169044eb2f6SDimitry Andric   DominatorTree DT;
1170044eb2f6SDimitry Andric   DT.recalculate(*ClonedFunc);
1171044eb2f6SDimitry Andric 
1172044eb2f6SDimitry Andric   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1173044eb2f6SDimitry Andric   LoopInfo LI(DT);
1174044eb2f6SDimitry Andric   BranchProbabilityInfo BPI(*ClonedFunc, LI);
1175044eb2f6SDimitry Andric   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1176044eb2f6SDimitry Andric 
11777c7aba6eSDimitry Andric   // Gather up the blocks that we're going to extract.
11787c7aba6eSDimitry Andric   std::vector<BasicBlock *> ToExtract;
1179b60736ecSDimitry Andric   auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
11807c7aba6eSDimitry Andric   ToExtract.push_back(ClonedOI->NonReturnBlock);
1181b60736ecSDimitry Andric   OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1182b60736ecSDimitry Andric       ClonedOI->NonReturnBlock, ClonedFuncTTI);
11837fa27ce4SDimitry Andric   for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
11847fa27ce4SDimitry Andric     if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
11857fa27ce4SDimitry Andric       ToExtract.push_back(BB);
11867c7aba6eSDimitry Andric       // FIXME: the code extractor may hoist/sink more code
11877c7aba6eSDimitry Andric       // into the outlined function which may make the outlining
11887c7aba6eSDimitry Andric       // overhead (the difference of the outlined function cost
11897c7aba6eSDimitry Andric       // and OutliningRegionCost) look larger.
11907fa27ce4SDimitry Andric       OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
11917c7aba6eSDimitry Andric     }
11927c7aba6eSDimitry Andric 
11937c7aba6eSDimitry Andric   // Extract the body of the if.
11941d5ae102SDimitry Andric   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1195044eb2f6SDimitry Andric   Function *OutlinedFunc =
1196044eb2f6SDimitry Andric       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1197e6d15924SDimitry Andric                     ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1198044eb2f6SDimitry Andric                     /* AllowVarargs */ true)
11991d5ae102SDimitry Andric           .extractCodeRegion(CEAC);
12007c7aba6eSDimitry Andric 
12017c7aba6eSDimitry Andric   if (OutlinedFunc) {
1202044eb2f6SDimitry Andric     BasicBlock *OutliningCallBB =
1203b60736ecSDimitry Andric         PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
12047c7aba6eSDimitry Andric     assert(OutliningCallBB->getParent() == ClonedFunc);
1205044eb2f6SDimitry Andric     OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1206044eb2f6SDimitry Andric   } else
1207044eb2f6SDimitry Andric     ORE.emit([&]() {
1208044eb2f6SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1209044eb2f6SDimitry Andric                                       &ToExtract.front()->front())
1210044eb2f6SDimitry Andric              << "Failed to extract region at block "
1211044eb2f6SDimitry Andric              << ore::NV("Block", ToExtract.front());
1212044eb2f6SDimitry Andric     });
12137c7aba6eSDimitry Andric 
12147c7aba6eSDimitry Andric   return OutlinedFunc;
12157c7aba6eSDimitry Andric }
12167c7aba6eSDimitry Andric 
~FunctionCloner()12177c7aba6eSDimitry Andric PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
12187c7aba6eSDimitry Andric   // Ditch the duplicate, since we're done with it, and rewrite all remaining
12197c7aba6eSDimitry Andric   // users (function pointers, etc.) back to the original function.
12207c7aba6eSDimitry Andric   ClonedFunc->replaceAllUsesWith(OrigFunc);
12217c7aba6eSDimitry Andric   ClonedFunc->eraseFromParent();
12227c7aba6eSDimitry Andric   if (!IsFunctionInlined) {
1223044eb2f6SDimitry Andric     // Remove each function that was speculatively created if there is no
12247c7aba6eSDimitry Andric     // reference.
1225044eb2f6SDimitry Andric     for (auto FuncBBPair : OutlinedFunctions) {
1226044eb2f6SDimitry Andric       Function *Func = FuncBBPair.first;
1227044eb2f6SDimitry Andric       Func->eraseFromParent();
1228044eb2f6SDimitry Andric     }
12297c7aba6eSDimitry Andric   }
12307c7aba6eSDimitry Andric }
12317c7aba6eSDimitry Andric 
unswitchFunction(Function & F)1232b60736ecSDimitry Andric std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1233b60736ecSDimitry Andric   if (F.hasAddressTaken())
1234044eb2f6SDimitry Andric     return {false, nullptr};
1235a303c417SDimitry Andric 
1236148779dfSDimitry Andric   // Let inliner handle it
1237b60736ecSDimitry Andric   if (F.hasFnAttribute(Attribute::AlwaysInline))
1238044eb2f6SDimitry Andric     return {false, nullptr};
1239148779dfSDimitry Andric 
1240b60736ecSDimitry Andric   if (F.hasFnAttribute(Attribute::NoInline))
1241044eb2f6SDimitry Andric     return {false, nullptr};
1242148779dfSDimitry Andric 
1243b60736ecSDimitry Andric   if (PSI.isFunctionEntryCold(&F))
1244044eb2f6SDimitry Andric     return {false, nullptr};
1245148779dfSDimitry Andric 
1246b60736ecSDimitry Andric   if (F.users().empty())
1247044eb2f6SDimitry Andric     return {false, nullptr};
1248a303c417SDimitry Andric 
1249b60736ecSDimitry Andric   OptimizationRemarkEmitter ORE(&F);
1250044eb2f6SDimitry Andric 
1251044eb2f6SDimitry Andric   // Only try to outline cold regions if we have a profile summary, which
1252044eb2f6SDimitry Andric   // implies we have profiling information.
1253b60736ecSDimitry Andric   if (PSI.hasProfileSummary() && F.hasProfileData() &&
1254044eb2f6SDimitry Andric       !DisableMultiRegionPartialInline) {
1255044eb2f6SDimitry Andric     std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1256eb11fae6SDimitry Andric         computeOutliningColdRegionsInfo(F, ORE);
1257044eb2f6SDimitry Andric     if (OMRI) {
1258b60736ecSDimitry Andric       FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1259044eb2f6SDimitry Andric 
1260b60736ecSDimitry Andric       LLVM_DEBUG({
1261cfca06d7SDimitry Andric         dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
1262cfca06d7SDimitry Andric         dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
1263044eb2f6SDimitry Andric                << "\n";
1264b60736ecSDimitry Andric       });
1265b60736ecSDimitry Andric 
1266044eb2f6SDimitry Andric       bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1267044eb2f6SDimitry Andric 
1268044eb2f6SDimitry Andric       if (DidOutline) {
1269b60736ecSDimitry Andric         LLVM_DEBUG({
1270044eb2f6SDimitry Andric           dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1271044eb2f6SDimitry Andric           Cloner.ClonedFunc->print(dbgs());
1272044eb2f6SDimitry Andric           dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1273b60736ecSDimitry Andric         });
1274044eb2f6SDimitry Andric 
1275044eb2f6SDimitry Andric         if (tryPartialInline(Cloner))
1276044eb2f6SDimitry Andric           return {true, nullptr};
1277044eb2f6SDimitry Andric       }
1278044eb2f6SDimitry Andric     }
1279044eb2f6SDimitry Andric   }
1280044eb2f6SDimitry Andric 
1281044eb2f6SDimitry Andric   // Fall-thru to regular partial inlining if we:
1282044eb2f6SDimitry Andric   //    i) can't find any cold regions to outline, or
1283044eb2f6SDimitry Andric   //   ii) can't inline the outlined function anywhere.
12846b3f41edSDimitry Andric   std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
12856b3f41edSDimitry Andric   if (!OI)
1286044eb2f6SDimitry Andric     return {false, nullptr};
1287600c6fa1SEd Schouten 
1288b60736ecSDimitry Andric   FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1289b60736ecSDimitry Andric   Cloner.normalizeReturnBlock();
1290044eb2f6SDimitry Andric 
1291044eb2f6SDimitry Andric   Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1292044eb2f6SDimitry Andric 
1293044eb2f6SDimitry Andric   if (!OutlinedFunction)
1294044eb2f6SDimitry Andric     return {false, nullptr};
1295600c6fa1SEd Schouten 
1296b60736ecSDimitry Andric   if (tryPartialInline(Cloner))
1297044eb2f6SDimitry Andric     return {true, OutlinedFunction};
1298b2f21fb0SEd Schouten 
1299044eb2f6SDimitry Andric   return {false, nullptr};
13006b3f41edSDimitry Andric }
13016b3f41edSDimitry Andric 
tryPartialInline(FunctionCloner & Cloner)13027c7aba6eSDimitry Andric bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1303044eb2f6SDimitry Andric   if (Cloner.OutlinedFunctions.empty())
13047c7aba6eSDimitry Andric     return false;
13056b3f41edSDimitry Andric 
1306344a3780SDimitry Andric   auto OutliningCosts = computeOutliningCosts(Cloner);
1307344a3780SDimitry Andric 
1308e3b55780SDimitry Andric   InstructionCost SizeCost = std::get<0>(OutliningCosts);
1309e3b55780SDimitry Andric   InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1310e3b55780SDimitry Andric 
1311e3b55780SDimitry Andric   assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1312e3b55780SDimitry Andric          "Expected valid costs");
13137c7aba6eSDimitry Andric 
1314044eb2f6SDimitry Andric   // Only calculate RelativeToEntryFreq when we are doing single region
1315044eb2f6SDimitry Andric   // outlining.
1316044eb2f6SDimitry Andric   BranchProbability RelativeToEntryFreq;
1317b60736ecSDimitry Andric   if (Cloner.ClonedOI)
1318044eb2f6SDimitry Andric     RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1319b60736ecSDimitry Andric   else
1320044eb2f6SDimitry Andric     // RelativeToEntryFreq doesn't make sense when we have more than one
1321044eb2f6SDimitry Andric     // outlined call because each call will have a different relative frequency
1322044eb2f6SDimitry Andric     // to the entry block.  We can consider using the average, but the
1323044eb2f6SDimitry Andric     // usefulness of that information is questionable. For now, assume we never
1324044eb2f6SDimitry Andric     // execute the calls to outlined functions.
1325044eb2f6SDimitry Andric     RelativeToEntryFreq = BranchProbability(0, 1);
13266b3f41edSDimitry Andric 
1327e3b55780SDimitry Andric   BlockFrequency WeightedRcost =
1328e3b55780SDimitry Andric       BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
1329044eb2f6SDimitry Andric 
1330044eb2f6SDimitry Andric   // The call sequence(s) to the outlined function(s) are larger than the sum of
1331044eb2f6SDimitry Andric   // the original outlined region size(s), it does not increase the chances of
1332044eb2f6SDimitry Andric   // inlining the function with outlining (The inliner uses the size increase to
13337c7aba6eSDimitry Andric   // model the cost of inlining a callee).
13347c7aba6eSDimitry Andric   if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
1335eb11fae6SDimitry Andric     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
13366b3f41edSDimitry Andric     DebugLoc DLoc;
13376b3f41edSDimitry Andric     BasicBlock *Block;
1338b60736ecSDimitry Andric     std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1339eb11fae6SDimitry Andric     OrigFuncORE.emit([&]() {
1340044eb2f6SDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
13416b3f41edSDimitry Andric                                         DLoc, Block)
13427c7aba6eSDimitry Andric              << ore::NV("Function", Cloner.OrigFunc)
13436b3f41edSDimitry Andric              << " not partially inlined into callers (Original Size = "
13447c7aba6eSDimitry Andric              << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
13456b3f41edSDimitry Andric              << ", Size of call sequence to outlined function = "
1346044eb2f6SDimitry Andric              << ore::NV("NewSize", SizeCost) << ")";
1347044eb2f6SDimitry Andric     });
13486b3f41edSDimitry Andric     return false;
13496b3f41edSDimitry Andric   }
13506b3f41edSDimitry Andric 
13511d5ae102SDimitry Andric   assert(Cloner.OrigFunc->users().empty() &&
13526b3f41edSDimitry Andric          "F's users should all be replaced!");
13537c7aba6eSDimitry Andric 
13547c7aba6eSDimitry Andric   std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
13557c7aba6eSDimitry Andric                             Cloner.ClonedFunc->user_end());
13566b3f41edSDimitry Andric 
13576b3f41edSDimitry Andric   DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1358c7dac04cSDimitry Andric   auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1359c7dac04cSDimitry Andric   if (CalleeEntryCount)
13607c7aba6eSDimitry Andric     computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
13616b3f41edSDimitry Andric 
1362eb11fae6SDimitry Andric   uint64_t CalleeEntryCountV =
1363c0981da4SDimitry Andric       (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
13647c7aba6eSDimitry Andric 
13656b3f41edSDimitry Andric   bool AnyInline = false;
13666b3f41edSDimitry Andric   for (User *User : Users) {
13676f8fc217SDimitry Andric     // Don't bother with BlockAddress used by CallBr for asm goto.
13686f8fc217SDimitry Andric     if (isa<BlockAddress>(User))
13696f8fc217SDimitry Andric       continue;
13706f8fc217SDimitry Andric 
1371cfca06d7SDimitry Andric     CallBase *CB = getSupportedCallBase(User);
13726b3f41edSDimitry Andric 
1373b60736ecSDimitry Andric     if (isLimitReached())
13746b3f41edSDimitry Andric       continue;
13756b3f41edSDimitry Andric 
1376cfca06d7SDimitry Andric     OptimizationRemarkEmitter CallerORE(CB->getCaller());
1377cfca06d7SDimitry Andric     if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
13786b3f41edSDimitry Andric       continue;
13796b3f41edSDimitry Andric 
1380044eb2f6SDimitry Andric     // Construct remark before doing the inlining, as after successful inlining
1381044eb2f6SDimitry Andric     // the callsite is removed.
1382cfca06d7SDimitry Andric     OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
1383044eb2f6SDimitry Andric     OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1384cfca06d7SDimitry Andric        << ore::NV("Caller", CB->getCaller());
13856b3f41edSDimitry Andric 
13867fa27ce4SDimitry Andric     InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
1387044eb2f6SDimitry Andric     // We can only forward varargs when we outlined a single region, else we
1388044eb2f6SDimitry Andric     // bail on vararg functions.
1389e3b55780SDimitry Andric     if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
1390044eb2f6SDimitry Andric                         (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1391cfca06d7SDimitry Andric                                          : nullptr))
1392cfca06d7SDimitry Andric              .isSuccess())
1393044eb2f6SDimitry Andric       continue;
1394044eb2f6SDimitry Andric 
1395eb11fae6SDimitry Andric     CallerORE.emit(OR);
13966b3f41edSDimitry Andric 
13976b3f41edSDimitry Andric     // Now update the entry count:
13986b3f41edSDimitry Andric     if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
13996b3f41edSDimitry Andric       uint64_t CallSiteCount = CallSiteToProfCountMap[User];
14006b3f41edSDimitry Andric       CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
14016b3f41edSDimitry Andric     }
14026b3f41edSDimitry Andric 
14036b3f41edSDimitry Andric     AnyInline = true;
14046b3f41edSDimitry Andric     NumPartialInlining++;
14056b3f41edSDimitry Andric     // Update the stats
1406044eb2f6SDimitry Andric     if (Cloner.ClonedOI)
14076b3f41edSDimitry Andric       NumPartialInlined++;
1408044eb2f6SDimitry Andric     else
1409044eb2f6SDimitry Andric       NumColdOutlinePartialInlined++;
14106b3f41edSDimitry Andric   }
14116b3f41edSDimitry Andric 
14127c7aba6eSDimitry Andric   if (AnyInline) {
14137c7aba6eSDimitry Andric     Cloner.IsFunctionInlined = true;
14147c7aba6eSDimitry Andric     if (CalleeEntryCount)
1415c0981da4SDimitry Andric       Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1416c0981da4SDimitry Andric           CalleeEntryCountV, CalleeEntryCount->getType()));
1417eb11fae6SDimitry Andric     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1418eb11fae6SDimitry Andric     OrigFuncORE.emit([&]() {
1419044eb2f6SDimitry Andric       return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1420044eb2f6SDimitry Andric              << "Partially inlined into at least one caller";
1421044eb2f6SDimitry Andric     });
14227c7aba6eSDimitry Andric   }
14236b3f41edSDimitry Andric 
14246b3f41edSDimitry Andric   return AnyInline;
1425600c6fa1SEd Schouten }
1426600c6fa1SEd Schouten 
run(Module & M)1427b915e9e0SDimitry Andric bool PartialInlinerImpl::run(Module &M) {
142812f3ca4cSDimitry Andric   if (DisablePartialInlining)
142912f3ca4cSDimitry Andric     return false;
143012f3ca4cSDimitry Andric 
1431b915e9e0SDimitry Andric   std::vector<Function *> Worklist;
1432b915e9e0SDimitry Andric   Worklist.reserve(M.size());
143301095a5dSDimitry Andric   for (Function &F : M)
143401095a5dSDimitry Andric     if (!F.use_empty() && !F.isDeclaration())
1435b915e9e0SDimitry Andric       Worklist.push_back(&F);
1436600c6fa1SEd Schouten 
1437b915e9e0SDimitry Andric   bool Changed = false;
1438b915e9e0SDimitry Andric   while (!Worklist.empty()) {
1439b915e9e0SDimitry Andric     Function *CurrFunc = Worklist.back();
1440b915e9e0SDimitry Andric     Worklist.pop_back();
1441600c6fa1SEd Schouten 
1442b915e9e0SDimitry Andric     if (CurrFunc->use_empty())
1443b915e9e0SDimitry Andric       continue;
1444600c6fa1SEd Schouten 
1445b60736ecSDimitry Andric     std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
1446044eb2f6SDimitry Andric     if (Result.second)
1447044eb2f6SDimitry Andric       Worklist.push_back(Result.second);
1448d8e91e46SDimitry Andric     Changed |= Result.first;
1449600c6fa1SEd Schouten   }
1450600c6fa1SEd Schouten 
1451b915e9e0SDimitry Andric   return Changed;
1452600c6fa1SEd Schouten }
1453600c6fa1SEd Schouten 
run(Module & M,ModuleAnalysisManager & AM)1454b915e9e0SDimitry Andric PreservedAnalyses PartialInlinerPass::run(Module &M,
1455b915e9e0SDimitry Andric                                           ModuleAnalysisManager &AM) {
1456b915e9e0SDimitry Andric   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1457a303c417SDimitry Andric 
1458cfca06d7SDimitry Andric   auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1459b915e9e0SDimitry Andric     return FAM.getResult<AssumptionAnalysis>(F);
1460b915e9e0SDimitry Andric   };
1461a303c417SDimitry Andric 
1462e6d15924SDimitry Andric   auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1463e6d15924SDimitry Andric     return FAM.getCachedResult<AssumptionAnalysis>(F);
1464e6d15924SDimitry Andric   };
1465e6d15924SDimitry Andric 
1466cfca06d7SDimitry Andric   auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1467a303c417SDimitry Andric     return FAM.getResult<BlockFrequencyAnalysis>(F);
1468a303c417SDimitry Andric   };
1469a303c417SDimitry Andric 
1470cfca06d7SDimitry Andric   auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1471a303c417SDimitry Andric     return FAM.getResult<TargetIRAnalysis>(F);
1472a303c417SDimitry Andric   };
1473a303c417SDimitry Andric 
1474cfca06d7SDimitry Andric   auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1475cfca06d7SDimitry Andric     return FAM.getResult<TargetLibraryAnalysis>(F);
1476cfca06d7SDimitry Andric   };
1477a303c417SDimitry Andric 
1478cfca06d7SDimitry Andric   ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
1479cfca06d7SDimitry Andric 
1480cfca06d7SDimitry Andric   if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1481cfca06d7SDimitry Andric                          GetTLI, PSI, GetBFI)
1482044eb2f6SDimitry Andric           .run(M))
148301095a5dSDimitry Andric     return PreservedAnalyses::none();
148401095a5dSDimitry Andric   return PreservedAnalyses::all();
1485600c6fa1SEd Schouten }
1486