1b60736ecSDimitry Andric //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2b60736ecSDimitry Andric //
3b60736ecSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b60736ecSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5b60736ecSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b60736ecSDimitry Andric //
7b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
8b60736ecSDimitry Andric //
9b60736ecSDimitry Andric // \file
10b60736ecSDimitry Andric // Uses profile information to split out cold blocks.
11b60736ecSDimitry Andric //
12b60736ecSDimitry Andric // This pass splits out cold machine basic blocks from the parent function. This
13b60736ecSDimitry Andric // implementation leverages the basic block section framework. Blocks marked
14b60736ecSDimitry Andric // cold by this pass are grouped together in a separate section prefixed with
15b60736ecSDimitry Andric // ".text.unlikely.*". The linker can then group these together as a cold
16b60736ecSDimitry Andric // section. The split part of the function is a contiguous region identified by
17b60736ecSDimitry Andric // the symbol "foo.cold". Grouping all cold blocks across functions together
18b60736ecSDimitry Andric // decreases fragmentation and improves icache and itlb utilization. Note that
19b60736ecSDimitry Andric // the overall changes to the binary size are negligible; only a small number of
20b60736ecSDimitry Andric // additional jump instructions may be introduced.
21b60736ecSDimitry Andric //
22b60736ecSDimitry Andric // For the original RFC of this pass please see
23b60736ecSDimitry Andric // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
25b60736ecSDimitry Andric
26344a3780SDimitry Andric #include "llvm/ADT/SmallVector.h"
277fa27ce4SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
287fa27ce4SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
297fa27ce4SDimitry Andric #include "llvm/Analysis/EHUtils.h"
30b60736ecSDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
31b60736ecSDimitry Andric #include "llvm/CodeGen/BasicBlockSectionUtils.h"
32b60736ecSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
33b60736ecSDimitry Andric #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
34b60736ecSDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
35b60736ecSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
36b60736ecSDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
37b60736ecSDimitry Andric #include "llvm/CodeGen/Passes.h"
38b1c73532SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
39b60736ecSDimitry Andric #include "llvm/IR/Function.h"
40b60736ecSDimitry Andric #include "llvm/InitializePasses.h"
41b60736ecSDimitry Andric #include "llvm/Support/CommandLine.h"
42e3b55780SDimitry Andric #include <optional>
43b60736ecSDimitry Andric
44b60736ecSDimitry Andric using namespace llvm;
45b60736ecSDimitry Andric
46b60736ecSDimitry Andric // FIXME: This cutoff value is CPU dependent and should be moved to
47b60736ecSDimitry Andric // TargetTransformInfo once we consider enabling this on other platforms.
48b60736ecSDimitry Andric // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
49b60736ecSDimitry Andric // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
50b60736ecSDimitry Andric // The default was empirically determined to be optimal when considering cutoff
51b60736ecSDimitry Andric // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
52b60736ecSDimitry Andric // Intel CPUs.
53b60736ecSDimitry Andric static cl::opt<unsigned>
54b60736ecSDimitry Andric PercentileCutoff("mfs-psi-cutoff",
55b60736ecSDimitry Andric cl::desc("Percentile profile summary cutoff used to "
56b60736ecSDimitry Andric "determine cold blocks. Unused if set to zero."),
57b60736ecSDimitry Andric cl::init(999950), cl::Hidden);
58b60736ecSDimitry Andric
59b60736ecSDimitry Andric static cl::opt<unsigned> ColdCountThreshold(
60b60736ecSDimitry Andric "mfs-count-threshold",
61b60736ecSDimitry Andric cl::desc(
62b60736ecSDimitry Andric "Minimum number of times a block must be executed to be retained."),
63b60736ecSDimitry Andric cl::init(1), cl::Hidden);
64b60736ecSDimitry Andric
65e3b55780SDimitry Andric static cl::opt<bool> SplitAllEHCode(
66e3b55780SDimitry Andric "mfs-split-ehcode",
67e3b55780SDimitry Andric cl::desc("Splits all EH code and it's descendants by default."),
68e3b55780SDimitry Andric cl::init(false), cl::Hidden);
69e3b55780SDimitry Andric
70b60736ecSDimitry Andric namespace {
71b60736ecSDimitry Andric
72b60736ecSDimitry Andric class MachineFunctionSplitter : public MachineFunctionPass {
73b60736ecSDimitry Andric public:
74b60736ecSDimitry Andric static char ID;
MachineFunctionSplitter()75b60736ecSDimitry Andric MachineFunctionSplitter() : MachineFunctionPass(ID) {
76b60736ecSDimitry Andric initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
77b60736ecSDimitry Andric }
78b60736ecSDimitry Andric
getPassName() const79b60736ecSDimitry Andric StringRef getPassName() const override {
80b60736ecSDimitry Andric return "Machine Function Splitter Transformation";
81b60736ecSDimitry Andric }
82b60736ecSDimitry Andric
83b60736ecSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
84b60736ecSDimitry Andric
85b60736ecSDimitry Andric bool runOnMachineFunction(MachineFunction &F) override;
86b60736ecSDimitry Andric };
87b60736ecSDimitry Andric } // end anonymous namespace
88b60736ecSDimitry Andric
89e3b55780SDimitry Andric /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
907fa27ce4SDimitry Andric /// only by EH pad as cold. This will help mark EH pads statically cold
917fa27ce4SDimitry Andric /// instead of relying on profile data.
setDescendantEHBlocksCold(MachineFunction & MF)927fa27ce4SDimitry Andric static void setDescendantEHBlocksCold(MachineFunction &MF) {
937fa27ce4SDimitry Andric DenseSet<MachineBasicBlock *> EHBlocks;
947fa27ce4SDimitry Andric computeEHOnlyBlocks(MF, EHBlocks);
957fa27ce4SDimitry Andric for (auto Block : EHBlocks) {
967fa27ce4SDimitry Andric Block->setSectionID(MBBSectionID::ColdSectionID);
977fa27ce4SDimitry Andric }
987fa27ce4SDimitry Andric }
99e3b55780SDimitry Andric
finishAdjustingBasicBlocksAndLandingPads(MachineFunction & MF)1007fa27ce4SDimitry Andric static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
1017fa27ce4SDimitry Andric auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
1027fa27ce4SDimitry Andric return X.getSectionID().Type < Y.getSectionID().Type;
103e3b55780SDimitry Andric };
1047fa27ce4SDimitry Andric llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
1057fa27ce4SDimitry Andric llvm::avoidZeroOffsetLandingPad(MF);
106e3b55780SDimitry Andric }
107e3b55780SDimitry Andric
isColdBlock(const MachineBasicBlock & MBB,const MachineBlockFrequencyInfo * MBFI,ProfileSummaryInfo * PSI)108344a3780SDimitry Andric static bool isColdBlock(const MachineBasicBlock &MBB,
109b60736ecSDimitry Andric const MachineBlockFrequencyInfo *MBFI,
110b60736ecSDimitry Andric ProfileSummaryInfo *PSI) {
111e3b55780SDimitry Andric std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
1127fa27ce4SDimitry Andric // For instrumentation profiles and sample profiles, we use different ways
1137fa27ce4SDimitry Andric // to judge whether a block is cold and should be split.
1147fa27ce4SDimitry Andric if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
1157fa27ce4SDimitry Andric // If using instrument profile, which is deemed "accurate", no count means
1167fa27ce4SDimitry Andric // cold.
117145449b1SDimitry Andric if (!Count)
118b60736ecSDimitry Andric return true;
1197fa27ce4SDimitry Andric if (PercentileCutoff > 0)
120b60736ecSDimitry Andric return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
1217fa27ce4SDimitry Andric // Fallthrough to end of function.
1227fa27ce4SDimitry Andric } else if (PSI->hasSampleProfile()) {
1237fa27ce4SDimitry Andric // For sample profile, no count means "do not judege coldness".
1247fa27ce4SDimitry Andric if (!Count)
1257fa27ce4SDimitry Andric return false;
126b60736ecSDimitry Andric }
1277fa27ce4SDimitry Andric
128b60736ecSDimitry Andric return (*Count < ColdCountThreshold);
129b60736ecSDimitry Andric }
130b60736ecSDimitry Andric
runOnMachineFunction(MachineFunction & MF)131b60736ecSDimitry Andric bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
132e3b55780SDimitry Andric // We target functions with profile data. Static information in the form
133e3b55780SDimitry Andric // of exception handling code may be split to cold if user passes the
134e3b55780SDimitry Andric // mfs-split-ehcode flag.
135e3b55780SDimitry Andric bool UseProfileData = MF.getFunction().hasProfileData();
136e3b55780SDimitry Andric if (!UseProfileData && !SplitAllEHCode)
137b60736ecSDimitry Andric return false;
138b60736ecSDimitry Andric
139b1c73532SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
140b1c73532SDimitry Andric if (!TII.isFunctionSafeToSplit(MF))
141b60736ecSDimitry Andric return false;
142b60736ecSDimitry Andric
143b60736ecSDimitry Andric // Renumbering blocks here preserves the order of the blocks as
144b60736ecSDimitry Andric // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
145b60736ecSDimitry Andric // blocks. Preserving the order of blocks is essential to retaining decisions
146b60736ecSDimitry Andric // made by prior passes such as MachineBlockPlacement.
147b60736ecSDimitry Andric MF.RenumberBlocks();
148b60736ecSDimitry Andric MF.setBBSectionsType(BasicBlockSection::Preset);
149e3b55780SDimitry Andric
150e3b55780SDimitry Andric MachineBlockFrequencyInfo *MBFI = nullptr;
151e3b55780SDimitry Andric ProfileSummaryInfo *PSI = nullptr;
152e3b55780SDimitry Andric if (UseProfileData) {
153ac9a064cSDimitry Andric MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
154e3b55780SDimitry Andric PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1557fa27ce4SDimitry Andric // If we don't have a good profile (sample profile is not deemed
1567fa27ce4SDimitry Andric // as a "good profile") and the function is not hot, then early
1577fa27ce4SDimitry Andric // return. (Because we can only trust hot functions when profile
1587fa27ce4SDimitry Andric // quality is not good.)
1597fa27ce4SDimitry Andric if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
1607fa27ce4SDimitry Andric // Split all EH code and it's descendant statically by default.
1617fa27ce4SDimitry Andric if (SplitAllEHCode)
1627fa27ce4SDimitry Andric setDescendantEHBlocksCold(MF);
1637fa27ce4SDimitry Andric finishAdjustingBasicBlocksAndLandingPads(MF);
1647fa27ce4SDimitry Andric return true;
1657fa27ce4SDimitry Andric }
166e3b55780SDimitry Andric }
167b60736ecSDimitry Andric
168344a3780SDimitry Andric SmallVector<MachineBasicBlock *, 2> LandingPads;
169b60736ecSDimitry Andric for (auto &MBB : MF) {
170344a3780SDimitry Andric if (MBB.isEntryBlock())
171b60736ecSDimitry Andric continue;
172344a3780SDimitry Andric
173344a3780SDimitry Andric if (MBB.isEHPad())
174344a3780SDimitry Andric LandingPads.push_back(&MBB);
175ac9a064cSDimitry Andric else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) &&
176ac9a064cSDimitry Andric TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode)
177b60736ecSDimitry Andric MBB.setSectionID(MBBSectionID::ColdSectionID);
178b60736ecSDimitry Andric }
179b60736ecSDimitry Andric
180e3b55780SDimitry Andric // Split all EH code and it's descendant statically by default.
181e3b55780SDimitry Andric if (SplitAllEHCode)
1827fa27ce4SDimitry Andric setDescendantEHBlocksCold(MF);
183344a3780SDimitry Andric // We only split out eh pads if all of them are cold.
184e3b55780SDimitry Andric else {
1857fa27ce4SDimitry Andric // Here we have UseProfileData == true.
186344a3780SDimitry Andric bool HasHotLandingPads = false;
187344a3780SDimitry Andric for (const MachineBasicBlock *LP : LandingPads) {
188ac9a064cSDimitry Andric if (!isColdBlock(*LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(*LP))
189344a3780SDimitry Andric HasHotLandingPads = true;
190344a3780SDimitry Andric }
191344a3780SDimitry Andric if (!HasHotLandingPads) {
192344a3780SDimitry Andric for (MachineBasicBlock *LP : LandingPads)
193344a3780SDimitry Andric LP->setSectionID(MBBSectionID::ColdSectionID);
194344a3780SDimitry Andric }
195e3b55780SDimitry Andric }
1967fa27ce4SDimitry Andric
1977fa27ce4SDimitry Andric finishAdjustingBasicBlocksAndLandingPads(MF);
198b60736ecSDimitry Andric return true;
199b60736ecSDimitry Andric }
200b60736ecSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const201b60736ecSDimitry Andric void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
202b60736ecSDimitry Andric AU.addRequired<MachineModuleInfoWrapperPass>();
203ac9a064cSDimitry Andric AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
204b60736ecSDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>();
205b60736ecSDimitry Andric }
206b60736ecSDimitry Andric
207b60736ecSDimitry Andric char MachineFunctionSplitter::ID = 0;
208b60736ecSDimitry Andric INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
209b60736ecSDimitry Andric "Split machine functions using profile information", false,
210b60736ecSDimitry Andric false)
211b60736ecSDimitry Andric
createMachineFunctionSplitterPass()212b60736ecSDimitry Andric MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
213b60736ecSDimitry Andric return new MachineFunctionSplitter();
214b60736ecSDimitry Andric }
215