177fc4c14SDimitry Andric //===- CodeLayout.cpp - Implementation of code layout algorithms ----------===//
277fc4c14SDimitry Andric //
377fc4c14SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
477fc4c14SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
577fc4c14SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
677fc4c14SDimitry Andric //
777fc4c14SDimitry Andric //===----------------------------------------------------------------------===//
877fc4c14SDimitry Andric //
97fa27ce4SDimitry Andric // The file implements "cache-aware" layout algorithms of basic blocks and
107fa27ce4SDimitry Andric // functions in a binary.
1177fc4c14SDimitry Andric //
1277fc4c14SDimitry Andric // The algorithm tries to find a layout of nodes (basic blocks) of a given CFG
1377fc4c14SDimitry Andric // optimizing jump locality and thus processor I-cache utilization. This is
1477fc4c14SDimitry Andric // achieved via increasing the number of fall-through jumps and co-locating
1577fc4c14SDimitry Andric // frequently executed nodes together. The name follows the underlying
1677fc4c14SDimitry Andric // optimization problem, Extended-TSP, which is a generalization of classical
1777fc4c14SDimitry Andric // (maximum) Traveling Salesmen Problem.
1877fc4c14SDimitry Andric //
1977fc4c14SDimitry Andric // The algorithm is a greedy heuristic that works with chains (ordered lists)
2077fc4c14SDimitry Andric // of basic blocks. Initially all chains are isolated basic blocks. On every
2177fc4c14SDimitry Andric // iteration, we pick a pair of chains whose merging yields the biggest increase
2277fc4c14SDimitry Andric // in the ExtTSP score, which models how i-cache "friendly" a specific chain is.
2377fc4c14SDimitry Andric // A pair of chains giving the maximum gain is merged into a new chain. The
2477fc4c14SDimitry Andric // procedure stops when there is only one chain left, or when merging does not
2577fc4c14SDimitry Andric // increase ExtTSP. In the latter case, the remaining chains are sorted by
2677fc4c14SDimitry Andric // density in the decreasing order.
2777fc4c14SDimitry Andric //
2877fc4c14SDimitry Andric // An important aspect is the way two chains are merged. Unlike earlier
2977fc4c14SDimitry Andric // algorithms (e.g., based on the approach of Pettis-Hansen), two
3077fc4c14SDimitry Andric // chains, X and Y, are first split into three, X1, X2, and Y. Then we
3177fc4c14SDimitry Andric // consider all possible ways of gluing the three chains (e.g., X1YX2, X1X2Y,
3277fc4c14SDimitry Andric // X2X1Y, X2YX1, YX1X2, YX2X1) and choose the one producing the largest score.
3377fc4c14SDimitry Andric // This improves the quality of the final result (the search space is larger)
3477fc4c14SDimitry Andric // while keeping the implementation sufficiently fast.
3577fc4c14SDimitry Andric //
3677fc4c14SDimitry Andric // Reference:
3777fc4c14SDimitry Andric // * A. Newell and S. Pupyrev, Improved Basic Block Reordering,
3877fc4c14SDimitry Andric // IEEE Transactions on Computers, 2020
39e3b55780SDimitry Andric // https://arxiv.org/abs/1809.04676
4077fc4c14SDimitry Andric //
4177fc4c14SDimitry Andric //===----------------------------------------------------------------------===//
4277fc4c14SDimitry Andric
4377fc4c14SDimitry Andric #include "llvm/Transforms/Utils/CodeLayout.h"
4477fc4c14SDimitry Andric #include "llvm/Support/CommandLine.h"
457fa27ce4SDimitry Andric #include "llvm/Support/Debug.h"
4677fc4c14SDimitry Andric
47e3b55780SDimitry Andric #include <cmath>
48b1c73532SDimitry Andric #include <set>
49e3b55780SDimitry Andric
5077fc4c14SDimitry Andric using namespace llvm;
51b1c73532SDimitry Andric using namespace llvm::codelayout;
52b1c73532SDimitry Andric
5377fc4c14SDimitry Andric #define DEBUG_TYPE "code-layout"
5477fc4c14SDimitry Andric
557fa27ce4SDimitry Andric namespace llvm {
56145449b1SDimitry Andric cl::opt<bool> EnableExtTspBlockPlacement(
57145449b1SDimitry Andric "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
58145449b1SDimitry Andric cl::desc("Enable machine block placement based on the ext-tsp model, "
59145449b1SDimitry Andric "optimizing I-cache utilization."));
60145449b1SDimitry Andric
61145449b1SDimitry Andric cl::opt<bool> ApplyExtTspWithoutProfile(
62145449b1SDimitry Andric "ext-tsp-apply-without-profile",
63145449b1SDimitry Andric cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
64145449b1SDimitry Andric cl::init(true), cl::Hidden);
657fa27ce4SDimitry Andric } // namespace llvm
66145449b1SDimitry Andric
67b1c73532SDimitry Andric // Algorithm-specific params for Ext-TSP. The values are tuned for the best
68b1c73532SDimitry Andric // performance of large-scale front-end bound binaries.
69e3b55780SDimitry Andric static cl::opt<double> ForwardWeightCond(
70e3b55780SDimitry Andric "ext-tsp-forward-weight-cond", cl::ReallyHidden, cl::init(0.1),
71e3b55780SDimitry Andric cl::desc("The weight of conditional forward jumps for ExtTSP value"));
7277fc4c14SDimitry Andric
73e3b55780SDimitry Andric static cl::opt<double> ForwardWeightUncond(
74e3b55780SDimitry Andric "ext-tsp-forward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
75e3b55780SDimitry Andric cl::desc("The weight of unconditional forward jumps for ExtTSP value"));
76e3b55780SDimitry Andric
77e3b55780SDimitry Andric static cl::opt<double> BackwardWeightCond(
78e3b55780SDimitry Andric "ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1),
797fa27ce4SDimitry Andric cl::desc("The weight of conditional backward jumps for ExtTSP value"));
80e3b55780SDimitry Andric
81e3b55780SDimitry Andric static cl::opt<double> BackwardWeightUncond(
82e3b55780SDimitry Andric "ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
837fa27ce4SDimitry Andric cl::desc("The weight of unconditional backward jumps for ExtTSP value"));
84e3b55780SDimitry Andric
85e3b55780SDimitry Andric static cl::opt<double> FallthroughWeightCond(
86e3b55780SDimitry Andric "ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0),
87e3b55780SDimitry Andric cl::desc("The weight of conditional fallthrough jumps for ExtTSP value"));
88e3b55780SDimitry Andric
89e3b55780SDimitry Andric static cl::opt<double> FallthroughWeightUncond(
90e3b55780SDimitry Andric "ext-tsp-fallthrough-weight-uncond", cl::ReallyHidden, cl::init(1.05),
91e3b55780SDimitry Andric cl::desc("The weight of unconditional fallthrough jumps for ExtTSP value"));
9277fc4c14SDimitry Andric
9377fc4c14SDimitry Andric static cl::opt<unsigned> ForwardDistance(
94e3b55780SDimitry Andric "ext-tsp-forward-distance", cl::ReallyHidden, cl::init(1024),
9577fc4c14SDimitry Andric cl::desc("The maximum distance (in bytes) of a forward jump for ExtTSP"));
9677fc4c14SDimitry Andric
9777fc4c14SDimitry Andric static cl::opt<unsigned> BackwardDistance(
98e3b55780SDimitry Andric "ext-tsp-backward-distance", cl::ReallyHidden, cl::init(640),
9977fc4c14SDimitry Andric cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
10077fc4c14SDimitry Andric
101145449b1SDimitry Andric // The maximum size of a chain created by the algorithm. The size is bounded
102b1c73532SDimitry Andric // so that the algorithm can efficiently process extremely large instances.
103145449b1SDimitry Andric static cl::opt<unsigned>
104b1c73532SDimitry Andric MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(512),
105b1c73532SDimitry Andric cl::desc("The maximum size of a chain to create"));
106145449b1SDimitry Andric
10777fc4c14SDimitry Andric // The maximum size of a chain for splitting. Larger values of the threshold
10877fc4c14SDimitry Andric // may yield better quality at the cost of worsen run-time.
10977fc4c14SDimitry Andric static cl::opt<unsigned> ChainSplitThreshold(
110e3b55780SDimitry Andric "ext-tsp-chain-split-threshold", cl::ReallyHidden, cl::init(128),
11177fc4c14SDimitry Andric cl::desc("The maximum size of a chain to apply splitting"));
11277fc4c14SDimitry Andric
113b1c73532SDimitry Andric // The maximum ratio between densities of two chains for merging.
114b1c73532SDimitry Andric static cl::opt<double> MaxMergeDensityRatio(
115b1c73532SDimitry Andric "ext-tsp-max-merge-density-ratio", cl::ReallyHidden, cl::init(100),
116b1c73532SDimitry Andric cl::desc("The maximum ratio between densities of two chains for merging"));
117b1c73532SDimitry Andric
118b1c73532SDimitry Andric // Algorithm-specific options for CDSort.
119b1c73532SDimitry Andric static cl::opt<unsigned> CacheEntries("cdsort-cache-entries", cl::ReallyHidden,
120b1c73532SDimitry Andric cl::desc("The size of the cache"));
121b1c73532SDimitry Andric
122b1c73532SDimitry Andric static cl::opt<unsigned> CacheSize("cdsort-cache-size", cl::ReallyHidden,
123b1c73532SDimitry Andric cl::desc("The size of a line in the cache"));
124b1c73532SDimitry Andric
125b1c73532SDimitry Andric static cl::opt<unsigned>
126b1c73532SDimitry Andric CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
127b1c73532SDimitry Andric cl::desc("The maximum size of a chain to create"));
128b1c73532SDimitry Andric
129b1c73532SDimitry Andric static cl::opt<double> DistancePower(
130b1c73532SDimitry Andric "cdsort-distance-power", cl::ReallyHidden,
131b1c73532SDimitry Andric cl::desc("The power exponent for the distance-based locality"));
132b1c73532SDimitry Andric
133b1c73532SDimitry Andric static cl::opt<double> FrequencyScale(
134b1c73532SDimitry Andric "cdsort-frequency-scale", cl::ReallyHidden,
135b1c73532SDimitry Andric cl::desc("The scale factor for the frequency-based locality"));
13677fc4c14SDimitry Andric
13777fc4c14SDimitry Andric namespace {
13877fc4c14SDimitry Andric
13977fc4c14SDimitry Andric // Epsilon for comparison of doubles.
14077fc4c14SDimitry Andric constexpr double EPS = 1e-8;
14177fc4c14SDimitry Andric
142e3b55780SDimitry Andric // Compute the Ext-TSP score for a given jump.
jumpExtTSPScore(uint64_t JumpDist,uint64_t JumpMaxDist,uint64_t Count,double Weight)143e3b55780SDimitry Andric double jumpExtTSPScore(uint64_t JumpDist, uint64_t JumpMaxDist, uint64_t Count,
144e3b55780SDimitry Andric double Weight) {
145e3b55780SDimitry Andric if (JumpDist > JumpMaxDist)
146e3b55780SDimitry Andric return 0;
147e3b55780SDimitry Andric double Prob = 1.0 - static_cast<double>(JumpDist) / JumpMaxDist;
148e3b55780SDimitry Andric return Weight * Prob * Count;
149e3b55780SDimitry Andric }
150e3b55780SDimitry Andric
15177fc4c14SDimitry Andric // Compute the Ext-TSP score for a jump between a given pair of blocks,
15277fc4c14SDimitry Andric // using their sizes, (estimated) addresses and the jump execution count.
extTSPScore(uint64_t SrcAddr,uint64_t SrcSize,uint64_t DstAddr,uint64_t Count,bool IsConditional)15377fc4c14SDimitry Andric double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
154e3b55780SDimitry Andric uint64_t Count, bool IsConditional) {
15577fc4c14SDimitry Andric // Fallthrough
15677fc4c14SDimitry Andric if (SrcAddr + SrcSize == DstAddr) {
157e3b55780SDimitry Andric return jumpExtTSPScore(0, 1, Count,
158e3b55780SDimitry Andric IsConditional ? FallthroughWeightCond
159e3b55780SDimitry Andric : FallthroughWeightUncond);
16077fc4c14SDimitry Andric }
16177fc4c14SDimitry Andric // Forward
16277fc4c14SDimitry Andric if (SrcAddr + SrcSize < DstAddr) {
163e3b55780SDimitry Andric const uint64_t Dist = DstAddr - (SrcAddr + SrcSize);
164e3b55780SDimitry Andric return jumpExtTSPScore(Dist, ForwardDistance, Count,
165e3b55780SDimitry Andric IsConditional ? ForwardWeightCond
166e3b55780SDimitry Andric : ForwardWeightUncond);
16777fc4c14SDimitry Andric }
16877fc4c14SDimitry Andric // Backward
169e3b55780SDimitry Andric const uint64_t Dist = SrcAddr + SrcSize - DstAddr;
170e3b55780SDimitry Andric return jumpExtTSPScore(Dist, BackwardDistance, Count,
171e3b55780SDimitry Andric IsConditional ? BackwardWeightCond
172e3b55780SDimitry Andric : BackwardWeightUncond);
17377fc4c14SDimitry Andric }
17477fc4c14SDimitry Andric
17577fc4c14SDimitry Andric /// A type of merging two chains, X and Y. The former chain is split into
17677fc4c14SDimitry Andric /// X1 and X2 and then concatenated with Y in the order specified by the type.
1777fa27ce4SDimitry Andric enum class MergeTypeT : int { X_Y, Y_X, X1_Y_X2, Y_X2_X1, X2_X1_Y };
17877fc4c14SDimitry Andric
17977fc4c14SDimitry Andric /// The gain of merging two chains, that is, the Ext-TSP score of the merge
1807fa27ce4SDimitry Andric /// together with the corresponding merge 'type' and 'offset'.
1817fa27ce4SDimitry Andric struct MergeGainT {
1827fa27ce4SDimitry Andric explicit MergeGainT() = default;
MergeGainT__anon0ffa47bc0111::MergeGainT1837fa27ce4SDimitry Andric explicit MergeGainT(double Score, size_t MergeOffset, MergeTypeT MergeType)
18477fc4c14SDimitry Andric : Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
18577fc4c14SDimitry Andric
score__anon0ffa47bc0111::MergeGainT18677fc4c14SDimitry Andric double score() const { return Score; }
18777fc4c14SDimitry Andric
mergeOffset__anon0ffa47bc0111::MergeGainT18877fc4c14SDimitry Andric size_t mergeOffset() const { return MergeOffset; }
18977fc4c14SDimitry Andric
mergeType__anon0ffa47bc0111::MergeGainT1907fa27ce4SDimitry Andric MergeTypeT mergeType() const { return MergeType; }
1917fa27ce4SDimitry Andric
setMergeType__anon0ffa47bc0111::MergeGainT1927fa27ce4SDimitry Andric void setMergeType(MergeTypeT Ty) { MergeType = Ty; }
19377fc4c14SDimitry Andric
19477fc4c14SDimitry Andric // Returns 'true' iff Other is preferred over this.
operator <__anon0ffa47bc0111::MergeGainT1957fa27ce4SDimitry Andric bool operator<(const MergeGainT &Other) const {
19677fc4c14SDimitry Andric return (Other.Score > EPS && Other.Score > Score + EPS);
19777fc4c14SDimitry Andric }
19877fc4c14SDimitry Andric
19977fc4c14SDimitry Andric // Update the current gain if Other is preferred over this.
updateIfLessThan__anon0ffa47bc0111::MergeGainT2007fa27ce4SDimitry Andric void updateIfLessThan(const MergeGainT &Other) {
20177fc4c14SDimitry Andric if (*this < Other)
20277fc4c14SDimitry Andric *this = Other;
20377fc4c14SDimitry Andric }
20477fc4c14SDimitry Andric
20577fc4c14SDimitry Andric private:
20677fc4c14SDimitry Andric double Score{-1.0};
20777fc4c14SDimitry Andric size_t MergeOffset{0};
2087fa27ce4SDimitry Andric MergeTypeT MergeType{MergeTypeT::X_Y};
20977fc4c14SDimitry Andric };
21077fc4c14SDimitry Andric
2117fa27ce4SDimitry Andric struct JumpT;
2127fa27ce4SDimitry Andric struct ChainT;
2137fa27ce4SDimitry Andric struct ChainEdge;
21477fc4c14SDimitry Andric
2157fa27ce4SDimitry Andric /// A node in the graph, typically corresponding to a basic block in the CFG or
2167fa27ce4SDimitry Andric /// a function in the call graph.
2177fa27ce4SDimitry Andric struct NodeT {
2187fa27ce4SDimitry Andric NodeT(const NodeT &) = delete;
2197fa27ce4SDimitry Andric NodeT(NodeT &&) = default;
2207fa27ce4SDimitry Andric NodeT &operator=(const NodeT &) = delete;
2217fa27ce4SDimitry Andric NodeT &operator=(NodeT &&) = default;
22277fc4c14SDimitry Andric
NodeT__anon0ffa47bc0111::NodeT223b1c73532SDimitry Andric explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
224b1c73532SDimitry Andric : Index(Index), Size(Size), ExecutionCount(Count) {}
2257fa27ce4SDimitry Andric
isEntry__anon0ffa47bc0111::NodeT22677fc4c14SDimitry Andric bool isEntry() const { return Index == 0; }
2277fa27ce4SDimitry Andric
228b1c73532SDimitry Andric // Check if Other is a successor of the node.
229b1c73532SDimitry Andric bool isSuccessor(const NodeT *Other) const;
230b1c73532SDimitry Andric
2317fa27ce4SDimitry Andric // The total execution count of outgoing jumps.
2327fa27ce4SDimitry Andric uint64_t outCount() const;
2337fa27ce4SDimitry Andric
2347fa27ce4SDimitry Andric // The total execution count of incoming jumps.
2357fa27ce4SDimitry Andric uint64_t inCount() const;
2367fa27ce4SDimitry Andric
2377fa27ce4SDimitry Andric // The original index of the node in graph.
2387fa27ce4SDimitry Andric size_t Index{0};
2397fa27ce4SDimitry Andric // The index of the node in the current chain.
2407fa27ce4SDimitry Andric size_t CurIndex{0};
2417fa27ce4SDimitry Andric // The size of the node in the binary.
2427fa27ce4SDimitry Andric uint64_t Size{0};
2437fa27ce4SDimitry Andric // The execution count of the node in the profile data.
2447fa27ce4SDimitry Andric uint64_t ExecutionCount{0};
2457fa27ce4SDimitry Andric // The current chain of the node.
2467fa27ce4SDimitry Andric ChainT *CurChain{nullptr};
2477fa27ce4SDimitry Andric // The offset of the node in the current chain.
2487fa27ce4SDimitry Andric mutable uint64_t EstimatedAddr{0};
2497fa27ce4SDimitry Andric // Forced successor of the node in the graph.
2507fa27ce4SDimitry Andric NodeT *ForcedSucc{nullptr};
2517fa27ce4SDimitry Andric // Forced predecessor of the node in the graph.
2527fa27ce4SDimitry Andric NodeT *ForcedPred{nullptr};
2537fa27ce4SDimitry Andric // Outgoing jumps from the node.
2547fa27ce4SDimitry Andric std::vector<JumpT *> OutJumps;
2557fa27ce4SDimitry Andric // Incoming jumps to the node.
2567fa27ce4SDimitry Andric std::vector<JumpT *> InJumps;
25777fc4c14SDimitry Andric };
25877fc4c14SDimitry Andric
2597fa27ce4SDimitry Andric /// An arc in the graph, typically corresponding to a jump between two nodes.
2607fa27ce4SDimitry Andric struct JumpT {
2617fa27ce4SDimitry Andric JumpT(const JumpT &) = delete;
2627fa27ce4SDimitry Andric JumpT(JumpT &&) = default;
2637fa27ce4SDimitry Andric JumpT &operator=(const JumpT &) = delete;
2647fa27ce4SDimitry Andric JumpT &operator=(JumpT &&) = default;
26577fc4c14SDimitry Andric
JumpT__anon0ffa47bc0111::JumpT2667fa27ce4SDimitry Andric explicit JumpT(NodeT *Source, NodeT *Target, uint64_t ExecutionCount)
2677fa27ce4SDimitry Andric : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
2687fa27ce4SDimitry Andric
2697fa27ce4SDimitry Andric // Source node of the jump.
2707fa27ce4SDimitry Andric NodeT *Source;
2717fa27ce4SDimitry Andric // Target node of the jump.
2727fa27ce4SDimitry Andric NodeT *Target;
27377fc4c14SDimitry Andric // Execution count of the arc in the profile data.
27477fc4c14SDimitry Andric uint64_t ExecutionCount{0};
275e3b55780SDimitry Andric // Whether the jump corresponds to a conditional branch.
276e3b55780SDimitry Andric bool IsConditional{false};
2777fa27ce4SDimitry Andric // The offset of the jump from the source node.
2787fa27ce4SDimitry Andric uint64_t Offset{0};
27977fc4c14SDimitry Andric };
28077fc4c14SDimitry Andric
2817fa27ce4SDimitry Andric /// A chain (ordered sequence) of nodes in the graph.
2827fa27ce4SDimitry Andric struct ChainT {
2837fa27ce4SDimitry Andric ChainT(const ChainT &) = delete;
2847fa27ce4SDimitry Andric ChainT(ChainT &&) = default;
2857fa27ce4SDimitry Andric ChainT &operator=(const ChainT &) = delete;
2867fa27ce4SDimitry Andric ChainT &operator=(ChainT &&) = default;
28777fc4c14SDimitry Andric
ChainT__anon0ffa47bc0111::ChainT2887fa27ce4SDimitry Andric explicit ChainT(uint64_t Id, NodeT *Node)
2897fa27ce4SDimitry Andric : Id(Id), ExecutionCount(Node->ExecutionCount), Size(Node->Size),
2907fa27ce4SDimitry Andric Nodes(1, Node) {}
29177fc4c14SDimitry Andric
numBlocks__anon0ffa47bc0111::ChainT2927fa27ce4SDimitry Andric size_t numBlocks() const { return Nodes.size(); }
29377fc4c14SDimitry Andric
density__anon0ffa47bc0111::ChainT294b1c73532SDimitry Andric double density() const { return ExecutionCount / Size; }
2957fa27ce4SDimitry Andric
isEntry__anon0ffa47bc0111::ChainT2967fa27ce4SDimitry Andric bool isEntry() const { return Nodes[0]->Index == 0; }
29777fc4c14SDimitry Andric
isCold__anon0ffa47bc0111::ChainT298e3b55780SDimitry Andric bool isCold() const {
2997fa27ce4SDimitry Andric for (NodeT *Node : Nodes) {
3007fa27ce4SDimitry Andric if (Node->ExecutionCount > 0)
301e3b55780SDimitry Andric return false;
302e3b55780SDimitry Andric }
303e3b55780SDimitry Andric return true;
304e3b55780SDimitry Andric }
305e3b55780SDimitry Andric
getEdge__anon0ffa47bc0111::ChainT3067fa27ce4SDimitry Andric ChainEdge *getEdge(ChainT *Other) const {
307b1c73532SDimitry Andric for (const auto &[Chain, ChainEdge] : Edges) {
308b1c73532SDimitry Andric if (Chain == Other)
309b1c73532SDimitry Andric return ChainEdge;
31077fc4c14SDimitry Andric }
31177fc4c14SDimitry Andric return nullptr;
31277fc4c14SDimitry Andric }
31377fc4c14SDimitry Andric
removeEdge__anon0ffa47bc0111::ChainT3147fa27ce4SDimitry Andric void removeEdge(ChainT *Other) {
31577fc4c14SDimitry Andric auto It = Edges.begin();
31677fc4c14SDimitry Andric while (It != Edges.end()) {
31777fc4c14SDimitry Andric if (It->first == Other) {
31877fc4c14SDimitry Andric Edges.erase(It);
31977fc4c14SDimitry Andric return;
32077fc4c14SDimitry Andric }
32177fc4c14SDimitry Andric It++;
32277fc4c14SDimitry Andric }
32377fc4c14SDimitry Andric }
32477fc4c14SDimitry Andric
addEdge__anon0ffa47bc0111::ChainT3257fa27ce4SDimitry Andric void addEdge(ChainT *Other, ChainEdge *Edge) {
32677fc4c14SDimitry Andric Edges.push_back(std::make_pair(Other, Edge));
32777fc4c14SDimitry Andric }
32877fc4c14SDimitry Andric
merge__anon0ffa47bc0111::ChainT329b1c73532SDimitry Andric void merge(ChainT *Other, std::vector<NodeT *> MergedBlocks) {
330b1c73532SDimitry Andric Nodes = std::move(MergedBlocks);
331b1c73532SDimitry Andric // Update the chain's data.
3327fa27ce4SDimitry Andric ExecutionCount += Other->ExecutionCount;
3337fa27ce4SDimitry Andric Size += Other->Size;
3347fa27ce4SDimitry Andric Id = Nodes[0]->Index;
335b1c73532SDimitry Andric // Update the node's data.
3367fa27ce4SDimitry Andric for (size_t Idx = 0; Idx < Nodes.size(); Idx++) {
3377fa27ce4SDimitry Andric Nodes[Idx]->CurChain = this;
3387fa27ce4SDimitry Andric Nodes[Idx]->CurIndex = Idx;
33977fc4c14SDimitry Andric }
34077fc4c14SDimitry Andric }
34177fc4c14SDimitry Andric
3427fa27ce4SDimitry Andric void mergeEdges(ChainT *Other);
34377fc4c14SDimitry Andric
clear__anon0ffa47bc0111::ChainT34477fc4c14SDimitry Andric void clear() {
3457fa27ce4SDimitry Andric Nodes.clear();
3467fa27ce4SDimitry Andric Nodes.shrink_to_fit();
34777fc4c14SDimitry Andric Edges.clear();
34877fc4c14SDimitry Andric Edges.shrink_to_fit();
34977fc4c14SDimitry Andric }
35077fc4c14SDimitry Andric
35177fc4c14SDimitry Andric // Unique chain identifier.
35277fc4c14SDimitry Andric uint64_t Id;
35377fc4c14SDimitry Andric // Cached ext-tsp score for the chain.
3547fa27ce4SDimitry Andric double Score{0};
355b1c73532SDimitry Andric // The total execution count of the chain. Since the execution count of
356b1c73532SDimitry Andric // a basic block is uint64_t, using doubles here to avoid overflow.
357b1c73532SDimitry Andric double ExecutionCount{0};
3587fa27ce4SDimitry Andric // The total size of the chain.
3597fa27ce4SDimitry Andric uint64_t Size{0};
3607fa27ce4SDimitry Andric // Nodes of the chain.
3617fa27ce4SDimitry Andric std::vector<NodeT *> Nodes;
36277fc4c14SDimitry Andric // Adjacent chains and corresponding edges (lists of jumps).
3637fa27ce4SDimitry Andric std::vector<std::pair<ChainT *, ChainEdge *>> Edges;
36477fc4c14SDimitry Andric };
36577fc4c14SDimitry Andric
3667fa27ce4SDimitry Andric /// An edge in the graph representing jumps between two chains.
3677fa27ce4SDimitry Andric /// When nodes are merged into chains, the edges are combined too so that
368b1c73532SDimitry Andric /// there is always at most one edge between a pair of chains.
3697fa27ce4SDimitry Andric struct ChainEdge {
37077fc4c14SDimitry Andric ChainEdge(const ChainEdge &) = delete;
37177fc4c14SDimitry Andric ChainEdge(ChainEdge &&) = default;
37277fc4c14SDimitry Andric ChainEdge &operator=(const ChainEdge &) = delete;
3737fa27ce4SDimitry Andric ChainEdge &operator=(ChainEdge &&) = delete;
37477fc4c14SDimitry Andric
ChainEdge__anon0ffa47bc0111::ChainEdge3757fa27ce4SDimitry Andric explicit ChainEdge(JumpT *Jump)
37677fc4c14SDimitry Andric : SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),
37777fc4c14SDimitry Andric Jumps(1, Jump) {}
37877fc4c14SDimitry Andric
srcChain__anon0ffa47bc0111::ChainEdge3797fa27ce4SDimitry Andric ChainT *srcChain() const { return SrcChain; }
38077fc4c14SDimitry Andric
dstChain__anon0ffa47bc0111::ChainEdge3817fa27ce4SDimitry Andric ChainT *dstChain() const { return DstChain; }
38277fc4c14SDimitry Andric
isSelfEdge__anon0ffa47bc0111::ChainEdge3837fa27ce4SDimitry Andric bool isSelfEdge() const { return SrcChain == DstChain; }
3847fa27ce4SDimitry Andric
jumps__anon0ffa47bc0111::ChainEdge3857fa27ce4SDimitry Andric const std::vector<JumpT *> &jumps() const { return Jumps; }
3867fa27ce4SDimitry Andric
appendJump__anon0ffa47bc0111::ChainEdge3877fa27ce4SDimitry Andric void appendJump(JumpT *Jump) { Jumps.push_back(Jump); }
38877fc4c14SDimitry Andric
moveJumps__anon0ffa47bc0111::ChainEdge38977fc4c14SDimitry Andric void moveJumps(ChainEdge *Other) {
39077fc4c14SDimitry Andric Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end());
39177fc4c14SDimitry Andric Other->Jumps.clear();
39277fc4c14SDimitry Andric Other->Jumps.shrink_to_fit();
39377fc4c14SDimitry Andric }
39477fc4c14SDimitry Andric
changeEndpoint__anon0ffa47bc0111::ChainEdge3957fa27ce4SDimitry Andric void changeEndpoint(ChainT *From, ChainT *To) {
3967fa27ce4SDimitry Andric if (From == SrcChain)
3977fa27ce4SDimitry Andric SrcChain = To;
3987fa27ce4SDimitry Andric if (From == DstChain)
3997fa27ce4SDimitry Andric DstChain = To;
4007fa27ce4SDimitry Andric }
4017fa27ce4SDimitry Andric
hasCachedMergeGain__anon0ffa47bc0111::ChainEdge4027fa27ce4SDimitry Andric bool hasCachedMergeGain(ChainT *Src, ChainT *Dst) const {
40377fc4c14SDimitry Andric return Src == SrcChain ? CacheValidForward : CacheValidBackward;
40477fc4c14SDimitry Andric }
40577fc4c14SDimitry Andric
getCachedMergeGain__anon0ffa47bc0111::ChainEdge4067fa27ce4SDimitry Andric MergeGainT getCachedMergeGain(ChainT *Src, ChainT *Dst) const {
40777fc4c14SDimitry Andric return Src == SrcChain ? CachedGainForward : CachedGainBackward;
40877fc4c14SDimitry Andric }
40977fc4c14SDimitry Andric
setCachedMergeGain__anon0ffa47bc0111::ChainEdge4107fa27ce4SDimitry Andric void setCachedMergeGain(ChainT *Src, ChainT *Dst, MergeGainT MergeGain) {
41177fc4c14SDimitry Andric if (Src == SrcChain) {
41277fc4c14SDimitry Andric CachedGainForward = MergeGain;
41377fc4c14SDimitry Andric CacheValidForward = true;
41477fc4c14SDimitry Andric } else {
41577fc4c14SDimitry Andric CachedGainBackward = MergeGain;
41677fc4c14SDimitry Andric CacheValidBackward = true;
41777fc4c14SDimitry Andric }
41877fc4c14SDimitry Andric }
41977fc4c14SDimitry Andric
invalidateCache__anon0ffa47bc0111::ChainEdge42077fc4c14SDimitry Andric void invalidateCache() {
42177fc4c14SDimitry Andric CacheValidForward = false;
42277fc4c14SDimitry Andric CacheValidBackward = false;
42377fc4c14SDimitry Andric }
42477fc4c14SDimitry Andric
setMergeGain__anon0ffa47bc0111::ChainEdge4257fa27ce4SDimitry Andric void setMergeGain(MergeGainT Gain) { CachedGain = Gain; }
4267fa27ce4SDimitry Andric
getMergeGain__anon0ffa47bc0111::ChainEdge4277fa27ce4SDimitry Andric MergeGainT getMergeGain() const { return CachedGain; }
4287fa27ce4SDimitry Andric
gain__anon0ffa47bc0111::ChainEdge4297fa27ce4SDimitry Andric double gain() const { return CachedGain.score(); }
4307fa27ce4SDimitry Andric
43177fc4c14SDimitry Andric private:
43277fc4c14SDimitry Andric // Source chain.
4337fa27ce4SDimitry Andric ChainT *SrcChain{nullptr};
43477fc4c14SDimitry Andric // Destination chain.
4357fa27ce4SDimitry Andric ChainT *DstChain{nullptr};
4367fa27ce4SDimitry Andric // Original jumps in the binary with corresponding execution counts.
4377fa27ce4SDimitry Andric std::vector<JumpT *> Jumps;
4387fa27ce4SDimitry Andric // Cached gain value for merging the pair of chains.
4397fa27ce4SDimitry Andric MergeGainT CachedGain;
4407fa27ce4SDimitry Andric
4417fa27ce4SDimitry Andric // Cached gain values for merging the pair of chains. Since the gain of
4427fa27ce4SDimitry Andric // merging (Src, Dst) and (Dst, Src) might be different, we store both values
4437fa27ce4SDimitry Andric // here and a flag indicating which of the options results in a higher gain.
4447fa27ce4SDimitry Andric // Cached gain values.
4457fa27ce4SDimitry Andric MergeGainT CachedGainForward;
4467fa27ce4SDimitry Andric MergeGainT CachedGainBackward;
44777fc4c14SDimitry Andric // Whether the cached value must be recomputed.
44877fc4c14SDimitry Andric bool CacheValidForward{false};
44977fc4c14SDimitry Andric bool CacheValidBackward{false};
45077fc4c14SDimitry Andric };
45177fc4c14SDimitry Andric
isSuccessor(const NodeT * Other) const452b1c73532SDimitry Andric bool NodeT::isSuccessor(const NodeT *Other) const {
453b1c73532SDimitry Andric for (JumpT *Jump : OutJumps)
454b1c73532SDimitry Andric if (Jump->Target == Other)
455b1c73532SDimitry Andric return true;
456b1c73532SDimitry Andric return false;
457b1c73532SDimitry Andric }
458b1c73532SDimitry Andric
outCount() const4597fa27ce4SDimitry Andric uint64_t NodeT::outCount() const {
4607fa27ce4SDimitry Andric uint64_t Count = 0;
461b1c73532SDimitry Andric for (JumpT *Jump : OutJumps)
4627fa27ce4SDimitry Andric Count += Jump->ExecutionCount;
4637fa27ce4SDimitry Andric return Count;
4647fa27ce4SDimitry Andric }
46577fc4c14SDimitry Andric
inCount() const4667fa27ce4SDimitry Andric uint64_t NodeT::inCount() const {
4677fa27ce4SDimitry Andric uint64_t Count = 0;
468b1c73532SDimitry Andric for (JumpT *Jump : InJumps)
4697fa27ce4SDimitry Andric Count += Jump->ExecutionCount;
4707fa27ce4SDimitry Andric return Count;
4717fa27ce4SDimitry Andric }
4727fa27ce4SDimitry Andric
mergeEdges(ChainT * Other)4737fa27ce4SDimitry Andric void ChainT::mergeEdges(ChainT *Other) {
474b1c73532SDimitry Andric // Update edges adjacent to chain Other.
475b1c73532SDimitry Andric for (const auto &[DstChain, DstEdge] : Other->Edges) {
4767fa27ce4SDimitry Andric ChainT *TargetChain = DstChain == Other ? this : DstChain;
477e3b55780SDimitry Andric ChainEdge *CurEdge = getEdge(TargetChain);
47877fc4c14SDimitry Andric if (CurEdge == nullptr) {
47977fc4c14SDimitry Andric DstEdge->changeEndpoint(Other, this);
48077fc4c14SDimitry Andric this->addEdge(TargetChain, DstEdge);
481b1c73532SDimitry Andric if (DstChain != this && DstChain != Other)
48277fc4c14SDimitry Andric DstChain->addEdge(this, DstEdge);
48377fc4c14SDimitry Andric } else {
48477fc4c14SDimitry Andric CurEdge->moveJumps(DstEdge);
48577fc4c14SDimitry Andric }
486b1c73532SDimitry Andric // Cleanup leftover edge.
487b1c73532SDimitry Andric if (DstChain != Other)
48877fc4c14SDimitry Andric DstChain->removeEdge(Other);
48977fc4c14SDimitry Andric }
49077fc4c14SDimitry Andric }
49177fc4c14SDimitry Andric
4927fa27ce4SDimitry Andric using NodeIter = std::vector<NodeT *>::const_iterator;
493b1c73532SDimitry Andric static std::vector<NodeT *> EmptyList;
49477fc4c14SDimitry Andric
495b1c73532SDimitry Andric /// A wrapper around three concatenated vectors (chains) of nodes; it is used
496b1c73532SDimitry Andric /// to avoid extra instantiation of the vectors.
497b1c73532SDimitry Andric struct MergedNodesT {
MergedNodesT__anon0ffa47bc0111::MergedNodesT498b1c73532SDimitry Andric MergedNodesT(NodeIter Begin1, NodeIter End1,
499b1c73532SDimitry Andric NodeIter Begin2 = EmptyList.begin(),
500b1c73532SDimitry Andric NodeIter End2 = EmptyList.end(),
501b1c73532SDimitry Andric NodeIter Begin3 = EmptyList.begin(),
502b1c73532SDimitry Andric NodeIter End3 = EmptyList.end())
50377fc4c14SDimitry Andric : Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
50477fc4c14SDimitry Andric End3(End3) {}
50577fc4c14SDimitry Andric
forEach__anon0ffa47bc0111::MergedNodesT50677fc4c14SDimitry Andric template <typename F> void forEach(const F &Func) const {
50777fc4c14SDimitry Andric for (auto It = Begin1; It != End1; It++)
50877fc4c14SDimitry Andric Func(*It);
50977fc4c14SDimitry Andric for (auto It = Begin2; It != End2; It++)
51077fc4c14SDimitry Andric Func(*It);
51177fc4c14SDimitry Andric for (auto It = Begin3; It != End3; It++)
51277fc4c14SDimitry Andric Func(*It);
51377fc4c14SDimitry Andric }
51477fc4c14SDimitry Andric
getNodes__anon0ffa47bc0111::MergedNodesT5157fa27ce4SDimitry Andric std::vector<NodeT *> getNodes() const {
5167fa27ce4SDimitry Andric std::vector<NodeT *> Result;
51777fc4c14SDimitry Andric Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
51877fc4c14SDimitry Andric std::distance(Begin3, End3));
51977fc4c14SDimitry Andric Result.insert(Result.end(), Begin1, End1);
52077fc4c14SDimitry Andric Result.insert(Result.end(), Begin2, End2);
52177fc4c14SDimitry Andric Result.insert(Result.end(), Begin3, End3);
52277fc4c14SDimitry Andric return Result;
52377fc4c14SDimitry Andric }
52477fc4c14SDimitry Andric
getFirstNode__anon0ffa47bc0111::MergedNodesT5257fa27ce4SDimitry Andric const NodeT *getFirstNode() const { return *Begin1; }
52677fc4c14SDimitry Andric
52777fc4c14SDimitry Andric private:
5287fa27ce4SDimitry Andric NodeIter Begin1;
5297fa27ce4SDimitry Andric NodeIter End1;
5307fa27ce4SDimitry Andric NodeIter Begin2;
5317fa27ce4SDimitry Andric NodeIter End2;
5327fa27ce4SDimitry Andric NodeIter Begin3;
5337fa27ce4SDimitry Andric NodeIter End3;
53477fc4c14SDimitry Andric };
53577fc4c14SDimitry Andric
536b1c73532SDimitry Andric /// A wrapper around two concatenated vectors (chains) of jumps.
537b1c73532SDimitry Andric struct MergedJumpsT {
MergedJumpsT__anon0ffa47bc0111::MergedJumpsT538b1c73532SDimitry Andric MergedJumpsT(const std::vector<JumpT *> *Jumps1,
539b1c73532SDimitry Andric const std::vector<JumpT *> *Jumps2 = nullptr) {
540b1c73532SDimitry Andric assert(!Jumps1->empty() && "cannot merge empty jump list");
541b1c73532SDimitry Andric JumpArray[0] = Jumps1;
542b1c73532SDimitry Andric JumpArray[1] = Jumps2;
543b1c73532SDimitry Andric }
544b1c73532SDimitry Andric
forEach__anon0ffa47bc0111::MergedJumpsT545b1c73532SDimitry Andric template <typename F> void forEach(const F &Func) const {
546b1c73532SDimitry Andric for (auto Jumps : JumpArray)
547b1c73532SDimitry Andric if (Jumps != nullptr)
548b1c73532SDimitry Andric for (JumpT *Jump : *Jumps)
549b1c73532SDimitry Andric Func(Jump);
550b1c73532SDimitry Andric }
551b1c73532SDimitry Andric
552b1c73532SDimitry Andric private:
553b1c73532SDimitry Andric std::array<const std::vector<JumpT *> *, 2> JumpArray{nullptr, nullptr};
554b1c73532SDimitry Andric };
555b1c73532SDimitry Andric
5567fa27ce4SDimitry Andric /// Merge two chains of nodes respecting a given 'type' and 'offset'.
5577fa27ce4SDimitry Andric ///
5587fa27ce4SDimitry Andric /// If MergeType == 0, then the result is a concatenation of two chains.
5597fa27ce4SDimitry Andric /// Otherwise, the first chain is cut into two sub-chains at the offset,
5607fa27ce4SDimitry Andric /// and merged using all possible ways of concatenating three chains.
mergeNodes(const std::vector<NodeT * > & X,const std::vector<NodeT * > & Y,size_t MergeOffset,MergeTypeT MergeType)561b1c73532SDimitry Andric MergedNodesT mergeNodes(const std::vector<NodeT *> &X,
5627fa27ce4SDimitry Andric const std::vector<NodeT *> &Y, size_t MergeOffset,
5637fa27ce4SDimitry Andric MergeTypeT MergeType) {
564b1c73532SDimitry Andric // Split the first chain, X, into X1 and X2.
5657fa27ce4SDimitry Andric NodeIter BeginX1 = X.begin();
5667fa27ce4SDimitry Andric NodeIter EndX1 = X.begin() + MergeOffset;
5677fa27ce4SDimitry Andric NodeIter BeginX2 = X.begin() + MergeOffset;
5687fa27ce4SDimitry Andric NodeIter EndX2 = X.end();
5697fa27ce4SDimitry Andric NodeIter BeginY = Y.begin();
5707fa27ce4SDimitry Andric NodeIter EndY = Y.end();
5717fa27ce4SDimitry Andric
572b1c73532SDimitry Andric // Construct a new chain from the three existing ones.
5737fa27ce4SDimitry Andric switch (MergeType) {
5747fa27ce4SDimitry Andric case MergeTypeT::X_Y:
575b1c73532SDimitry Andric return MergedNodesT(BeginX1, EndX2, BeginY, EndY);
5767fa27ce4SDimitry Andric case MergeTypeT::Y_X:
577b1c73532SDimitry Andric return MergedNodesT(BeginY, EndY, BeginX1, EndX2);
5787fa27ce4SDimitry Andric case MergeTypeT::X1_Y_X2:
579b1c73532SDimitry Andric return MergedNodesT(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
5807fa27ce4SDimitry Andric case MergeTypeT::Y_X2_X1:
581b1c73532SDimitry Andric return MergedNodesT(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
5827fa27ce4SDimitry Andric case MergeTypeT::X2_X1_Y:
583b1c73532SDimitry Andric return MergedNodesT(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
5847fa27ce4SDimitry Andric }
5857fa27ce4SDimitry Andric llvm_unreachable("unexpected chain merge type");
5867fa27ce4SDimitry Andric }
5877fa27ce4SDimitry Andric
58877fc4c14SDimitry Andric /// The implementation of the ExtTSP algorithm.
58977fc4c14SDimitry Andric class ExtTSPImpl {
59077fc4c14SDimitry Andric public:
ExtTSPImpl(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)591b1c73532SDimitry Andric ExtTSPImpl(ArrayRef<uint64_t> NodeSizes, ArrayRef<uint64_t> NodeCounts,
592b1c73532SDimitry Andric ArrayRef<EdgeCount> EdgeCounts)
5937fa27ce4SDimitry Andric : NumNodes(NodeSizes.size()) {
59477fc4c14SDimitry Andric initialize(NodeSizes, NodeCounts, EdgeCounts);
59577fc4c14SDimitry Andric }
59677fc4c14SDimitry Andric
5977fa27ce4SDimitry Andric /// Run the algorithm and return an optimized ordering of nodes.
run()598b1c73532SDimitry Andric std::vector<uint64_t> run() {
5997fa27ce4SDimitry Andric // Pass 1: Merge nodes with their mutually forced successors
60077fc4c14SDimitry Andric mergeForcedPairs();
60177fc4c14SDimitry Andric
60277fc4c14SDimitry Andric // Pass 2: Merge pairs of chains while improving the ExtTSP objective
60377fc4c14SDimitry Andric mergeChainPairs();
60477fc4c14SDimitry Andric
6057fa27ce4SDimitry Andric // Pass 3: Merge cold nodes to reduce code size
60677fc4c14SDimitry Andric mergeColdChains();
60777fc4c14SDimitry Andric
6087fa27ce4SDimitry Andric // Collect nodes from all chains
609b1c73532SDimitry Andric return concatChains();
61077fc4c14SDimitry Andric }
61177fc4c14SDimitry Andric
61277fc4c14SDimitry Andric private:
61377fc4c14SDimitry Andric /// Initialize the algorithm's data structures.
initialize(const ArrayRef<uint64_t> & NodeSizes,const ArrayRef<uint64_t> & NodeCounts,const ArrayRef<EdgeCount> & EdgeCounts)614b1c73532SDimitry Andric void initialize(const ArrayRef<uint64_t> &NodeSizes,
615b1c73532SDimitry Andric const ArrayRef<uint64_t> &NodeCounts,
616b1c73532SDimitry Andric const ArrayRef<EdgeCount> &EdgeCounts) {
617b1c73532SDimitry Andric // Initialize nodes.
6187fa27ce4SDimitry Andric AllNodes.reserve(NumNodes);
6197fa27ce4SDimitry Andric for (uint64_t Idx = 0; Idx < NumNodes; Idx++) {
6207fa27ce4SDimitry Andric uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL);
6217fa27ce4SDimitry Andric uint64_t ExecutionCount = NodeCounts[Idx];
622b1c73532SDimitry Andric // The execution count of the entry node is set to at least one.
6237fa27ce4SDimitry Andric if (Idx == 0 && ExecutionCount == 0)
62477fc4c14SDimitry Andric ExecutionCount = 1;
6257fa27ce4SDimitry Andric AllNodes.emplace_back(Idx, Size, ExecutionCount);
62677fc4c14SDimitry Andric }
62777fc4c14SDimitry Andric
628b1c73532SDimitry Andric // Initialize jumps between the nodes.
629e3b55780SDimitry Andric SuccNodes.resize(NumNodes);
630e3b55780SDimitry Andric PredNodes.resize(NumNodes);
631e3b55780SDimitry Andric std::vector<uint64_t> OutDegree(NumNodes, 0);
63277fc4c14SDimitry Andric AllJumps.reserve(EdgeCounts.size());
633b1c73532SDimitry Andric for (auto Edge : EdgeCounts) {
634b1c73532SDimitry Andric ++OutDegree[Edge.src];
635b1c73532SDimitry Andric // Ignore self-edges.
636b1c73532SDimitry Andric if (Edge.src == Edge.dst)
63777fc4c14SDimitry Andric continue;
63877fc4c14SDimitry Andric
639b1c73532SDimitry Andric SuccNodes[Edge.src].push_back(Edge.dst);
640b1c73532SDimitry Andric PredNodes[Edge.dst].push_back(Edge.src);
641b1c73532SDimitry Andric if (Edge.count > 0) {
642b1c73532SDimitry Andric NodeT &PredNode = AllNodes[Edge.src];
643b1c73532SDimitry Andric NodeT &SuccNode = AllNodes[Edge.dst];
644b1c73532SDimitry Andric AllJumps.emplace_back(&PredNode, &SuccNode, Edge.count);
6457fa27ce4SDimitry Andric SuccNode.InJumps.push_back(&AllJumps.back());
6467fa27ce4SDimitry Andric PredNode.OutJumps.push_back(&AllJumps.back());
647b1c73532SDimitry Andric // Adjust execution counts.
648b1c73532SDimitry Andric PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Edge.count);
649b1c73532SDimitry Andric SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Edge.count);
65077fc4c14SDimitry Andric }
65177fc4c14SDimitry Andric }
6527fa27ce4SDimitry Andric for (JumpT &Jump : AllJumps) {
653b1c73532SDimitry Andric assert(OutDegree[Jump.Source->Index] > 0 &&
654b1c73532SDimitry Andric "incorrectly computed out-degree of the block");
655e3b55780SDimitry Andric Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
656e3b55780SDimitry Andric }
65777fc4c14SDimitry Andric
658b1c73532SDimitry Andric // Initialize chains.
65977fc4c14SDimitry Andric AllChains.reserve(NumNodes);
66077fc4c14SDimitry Andric HotChains.reserve(NumNodes);
6617fa27ce4SDimitry Andric for (NodeT &Node : AllNodes) {
662b1c73532SDimitry Andric // Create a chain.
6637fa27ce4SDimitry Andric AllChains.emplace_back(Node.Index, &Node);
6647fa27ce4SDimitry Andric Node.CurChain = &AllChains.back();
665b1c73532SDimitry Andric if (Node.ExecutionCount > 0)
66677fc4c14SDimitry Andric HotChains.push_back(&AllChains.back());
66777fc4c14SDimitry Andric }
66877fc4c14SDimitry Andric
669b1c73532SDimitry Andric // Initialize chain edges.
67077fc4c14SDimitry Andric AllEdges.reserve(AllJumps.size());
6717fa27ce4SDimitry Andric for (NodeT &PredNode : AllNodes) {
6727fa27ce4SDimitry Andric for (JumpT *Jump : PredNode.OutJumps) {
673b1c73532SDimitry Andric assert(Jump->ExecutionCount > 0 && "incorrectly initialized jump");
6747fa27ce4SDimitry Andric NodeT *SuccNode = Jump->Target;
6757fa27ce4SDimitry Andric ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
676b1c73532SDimitry Andric // This edge is already present in the graph.
67777fc4c14SDimitry Andric if (CurEdge != nullptr) {
6787fa27ce4SDimitry Andric assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
67977fc4c14SDimitry Andric CurEdge->appendJump(Jump);
68077fc4c14SDimitry Andric continue;
68177fc4c14SDimitry Andric }
682b1c73532SDimitry Andric // This is a new edge.
68377fc4c14SDimitry Andric AllEdges.emplace_back(Jump);
6847fa27ce4SDimitry Andric PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
6857fa27ce4SDimitry Andric SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
68677fc4c14SDimitry Andric }
68777fc4c14SDimitry Andric }
68877fc4c14SDimitry Andric }
68977fc4c14SDimitry Andric
6907fa27ce4SDimitry Andric /// For a pair of nodes, A and B, node B is the forced successor of A,
69177fc4c14SDimitry Andric /// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps
6927fa27ce4SDimitry Andric /// to B are from A. Such nodes should be adjacent in the optimal ordering;
6937fa27ce4SDimitry Andric /// the method finds and merges such pairs of nodes.
mergeForcedPairs()69477fc4c14SDimitry Andric void mergeForcedPairs() {
695b1c73532SDimitry Andric // Find forced pairs of blocks.
6967fa27ce4SDimitry Andric for (NodeT &Node : AllNodes) {
6977fa27ce4SDimitry Andric if (SuccNodes[Node.Index].size() == 1 &&
6987fa27ce4SDimitry Andric PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
6997fa27ce4SDimitry Andric SuccNodes[Node.Index][0] != 0) {
7007fa27ce4SDimitry Andric size_t SuccIndex = SuccNodes[Node.Index][0];
7017fa27ce4SDimitry Andric Node.ForcedSucc = &AllNodes[SuccIndex];
7027fa27ce4SDimitry Andric AllNodes[SuccIndex].ForcedPred = &Node;
70377fc4c14SDimitry Andric }
70477fc4c14SDimitry Andric }
70577fc4c14SDimitry Andric
70677fc4c14SDimitry Andric // There might be 'cycles' in the forced dependencies, since profile
70777fc4c14SDimitry Andric // data isn't 100% accurate. Typically this is observed in loops, when the
70877fc4c14SDimitry Andric // loop edges are the hottest successors for the basic blocks of the loop.
7097fa27ce4SDimitry Andric // Break the cycles by choosing the node with the smallest index as the
71077fc4c14SDimitry Andric // head. This helps to keep the original order of the loops, which likely
71177fc4c14SDimitry Andric // have already been rotated in the optimized manner.
7127fa27ce4SDimitry Andric for (NodeT &Node : AllNodes) {
7137fa27ce4SDimitry Andric if (Node.ForcedSucc == nullptr || Node.ForcedPred == nullptr)
71477fc4c14SDimitry Andric continue;
71577fc4c14SDimitry Andric
7167fa27ce4SDimitry Andric NodeT *SuccNode = Node.ForcedSucc;
7177fa27ce4SDimitry Andric while (SuccNode != nullptr && SuccNode != &Node) {
7187fa27ce4SDimitry Andric SuccNode = SuccNode->ForcedSucc;
71977fc4c14SDimitry Andric }
7207fa27ce4SDimitry Andric if (SuccNode == nullptr)
72177fc4c14SDimitry Andric continue;
722b1c73532SDimitry Andric // Break the cycle.
7237fa27ce4SDimitry Andric AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr;
7247fa27ce4SDimitry Andric Node.ForcedPred = nullptr;
72577fc4c14SDimitry Andric }
72677fc4c14SDimitry Andric
727b1c73532SDimitry Andric // Merge nodes with their fallthrough successors.
7287fa27ce4SDimitry Andric for (NodeT &Node : AllNodes) {
7297fa27ce4SDimitry Andric if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) {
7307fa27ce4SDimitry Andric const NodeT *CurBlock = &Node;
73177fc4c14SDimitry Andric while (CurBlock->ForcedSucc != nullptr) {
7327fa27ce4SDimitry Andric const NodeT *NextBlock = CurBlock->ForcedSucc;
7337fa27ce4SDimitry Andric mergeChains(Node.CurChain, NextBlock->CurChain, 0, MergeTypeT::X_Y);
73477fc4c14SDimitry Andric CurBlock = NextBlock;
73577fc4c14SDimitry Andric }
73677fc4c14SDimitry Andric }
73777fc4c14SDimitry Andric }
73877fc4c14SDimitry Andric }
73977fc4c14SDimitry Andric
74077fc4c14SDimitry Andric /// Merge pairs of chains while improving the ExtTSP objective.
mergeChainPairs()74177fc4c14SDimitry Andric void mergeChainPairs() {
742b1c73532SDimitry Andric /// Deterministically compare pairs of chains.
7437fa27ce4SDimitry Andric auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
7447fa27ce4SDimitry Andric const ChainT *A2, const ChainT *B2) {
745b1c73532SDimitry Andric return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
74677fc4c14SDimitry Andric };
74777fc4c14SDimitry Andric
74877fc4c14SDimitry Andric while (HotChains.size() > 1) {
7497fa27ce4SDimitry Andric ChainT *BestChainPred = nullptr;
7507fa27ce4SDimitry Andric ChainT *BestChainSucc = nullptr;
7517fa27ce4SDimitry Andric MergeGainT BestGain;
752b1c73532SDimitry Andric // Iterate over all pairs of chains.
7537fa27ce4SDimitry Andric for (ChainT *ChainPred : HotChains) {
754b1c73532SDimitry Andric // Get candidates for merging with the current chain.
755b1c73532SDimitry Andric for (const auto &[ChainSucc, Edge] : ChainPred->Edges) {
756b1c73532SDimitry Andric // Ignore loop edges.
757b1c73532SDimitry Andric if (Edge->isSelfEdge())
75877fc4c14SDimitry Andric continue;
759b1c73532SDimitry Andric // Skip the merge if the combined chain violates the maximum specified
760b1c73532SDimitry Andric // size.
761145449b1SDimitry Andric if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize)
762145449b1SDimitry Andric continue;
763b1c73532SDimitry Andric // Don't merge the chains if they have vastly different densities.
764b1c73532SDimitry Andric // Skip the merge if the ratio between the densities exceeds
765b1c73532SDimitry Andric // MaxMergeDensityRatio. Smaller values of the option result in fewer
766b1c73532SDimitry Andric // merges, and hence, more chains.
767b1c73532SDimitry Andric const double ChainPredDensity = ChainPred->density();
768b1c73532SDimitry Andric const double ChainSuccDensity = ChainSucc->density();
769b1c73532SDimitry Andric assert(ChainPredDensity > 0.0 && ChainSuccDensity > 0.0 &&
770b1c73532SDimitry Andric "incorrectly computed chain densities");
771b1c73532SDimitry Andric auto [MinDensity, MaxDensity] =
772b1c73532SDimitry Andric std::minmax(ChainPredDensity, ChainSuccDensity);
773b1c73532SDimitry Andric const double Ratio = MaxDensity / MinDensity;
774b1c73532SDimitry Andric if (Ratio > MaxMergeDensityRatio)
775b1c73532SDimitry Andric continue;
776145449b1SDimitry Andric
777b1c73532SDimitry Andric // Compute the gain of merging the two chains.
7787fa27ce4SDimitry Andric MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
77977fc4c14SDimitry Andric if (CurGain.score() <= EPS)
78077fc4c14SDimitry Andric continue;
78177fc4c14SDimitry Andric
78277fc4c14SDimitry Andric if (BestGain < CurGain ||
78377fc4c14SDimitry Andric (std::abs(CurGain.score() - BestGain.score()) < EPS &&
78477fc4c14SDimitry Andric compareChainPairs(ChainPred, ChainSucc, BestChainPred,
78577fc4c14SDimitry Andric BestChainSucc))) {
78677fc4c14SDimitry Andric BestGain = CurGain;
78777fc4c14SDimitry Andric BestChainPred = ChainPred;
78877fc4c14SDimitry Andric BestChainSucc = ChainSucc;
78977fc4c14SDimitry Andric }
79077fc4c14SDimitry Andric }
79177fc4c14SDimitry Andric }
79277fc4c14SDimitry Andric
793b1c73532SDimitry Andric // Stop merging when there is no improvement.
79477fc4c14SDimitry Andric if (BestGain.score() <= EPS)
79577fc4c14SDimitry Andric break;
79677fc4c14SDimitry Andric
797b1c73532SDimitry Andric // Merge the best pair of chains.
79877fc4c14SDimitry Andric mergeChains(BestChainPred, BestChainSucc, BestGain.mergeOffset(),
79977fc4c14SDimitry Andric BestGain.mergeType());
80077fc4c14SDimitry Andric }
80177fc4c14SDimitry Andric }
80277fc4c14SDimitry Andric
8037fa27ce4SDimitry Andric /// Merge remaining nodes into chains w/o taking jump counts into
8047fa27ce4SDimitry Andric /// consideration. This allows to maintain the original node order in the
805b1c73532SDimitry Andric /// absence of profile data.
mergeColdChains()80677fc4c14SDimitry Andric void mergeColdChains() {
80777fc4c14SDimitry Andric for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
808e3b55780SDimitry Andric // Iterating in reverse order to make sure original fallthrough jumps are
809e3b55780SDimitry Andric // merged first; this might be beneficial for code size.
81077fc4c14SDimitry Andric size_t NumSuccs = SuccNodes[SrcBB].size();
81177fc4c14SDimitry Andric for (size_t Idx = 0; Idx < NumSuccs; Idx++) {
8127fa27ce4SDimitry Andric size_t DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
8137fa27ce4SDimitry Andric ChainT *SrcChain = AllNodes[SrcBB].CurChain;
8147fa27ce4SDimitry Andric ChainT *DstChain = AllNodes[DstBB].CurChain;
81577fc4c14SDimitry Andric if (SrcChain != DstChain && !DstChain->isEntry() &&
8167fa27ce4SDimitry Andric SrcChain->Nodes.back()->Index == SrcBB &&
8177fa27ce4SDimitry Andric DstChain->Nodes.front()->Index == DstBB &&
818e3b55780SDimitry Andric SrcChain->isCold() == DstChain->isCold()) {
8197fa27ce4SDimitry Andric mergeChains(SrcChain, DstChain, 0, MergeTypeT::X_Y);
82077fc4c14SDimitry Andric }
82177fc4c14SDimitry Andric }
82277fc4c14SDimitry Andric }
82377fc4c14SDimitry Andric }
82477fc4c14SDimitry Andric
8257fa27ce4SDimitry Andric /// Compute the Ext-TSP score for a given node order and a list of jumps.
extTSPScore(const MergedNodesT & Nodes,const MergedJumpsT & Jumps) const826b1c73532SDimitry Andric double extTSPScore(const MergedNodesT &Nodes,
827b1c73532SDimitry Andric const MergedJumpsT &Jumps) const {
82877fc4c14SDimitry Andric uint64_t CurAddr = 0;
829b1c73532SDimitry Andric Nodes.forEach([&](const NodeT *Node) {
8307fa27ce4SDimitry Andric Node->EstimatedAddr = CurAddr;
8317fa27ce4SDimitry Andric CurAddr += Node->Size;
83277fc4c14SDimitry Andric });
83377fc4c14SDimitry Andric
83477fc4c14SDimitry Andric double Score = 0;
835b1c73532SDimitry Andric Jumps.forEach([&](const JumpT *Jump) {
8367fa27ce4SDimitry Andric const NodeT *SrcBlock = Jump->Source;
8377fa27ce4SDimitry Andric const NodeT *DstBlock = Jump->Target;
83877fc4c14SDimitry Andric Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
839e3b55780SDimitry Andric DstBlock->EstimatedAddr, Jump->ExecutionCount,
840e3b55780SDimitry Andric Jump->IsConditional);
841b1c73532SDimitry Andric });
84277fc4c14SDimitry Andric return Score;
84377fc4c14SDimitry Andric }
84477fc4c14SDimitry Andric
84577fc4c14SDimitry Andric /// Compute the gain of merging two chains.
84677fc4c14SDimitry Andric ///
84777fc4c14SDimitry Andric /// The function considers all possible ways of merging two chains and
84877fc4c14SDimitry Andric /// computes the one having the largest increase in ExtTSP objective. The
84977fc4c14SDimitry Andric /// result is a pair with the first element being the gain and the second
85077fc4c14SDimitry Andric /// element being the corresponding merging type.
getBestMergeGain(ChainT * ChainPred,ChainT * ChainSucc,ChainEdge * Edge) const8517fa27ce4SDimitry Andric MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
85277fc4c14SDimitry Andric ChainEdge *Edge) const {
853b1c73532SDimitry Andric if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
85477fc4c14SDimitry Andric return Edge->getCachedMergeGain(ChainPred, ChainSucc);
85577fc4c14SDimitry Andric
856b1c73532SDimitry Andric assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
857b1c73532SDimitry Andric // Precompute jumps between ChainPred and ChainSucc.
858e3b55780SDimitry Andric ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
859b1c73532SDimitry Andric MergedJumpsT Jumps(&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr);
86077fc4c14SDimitry Andric
861b1c73532SDimitry Andric // This object holds the best chosen gain of merging two chains.
8627fa27ce4SDimitry Andric MergeGainT Gain = MergeGainT();
86377fc4c14SDimitry Andric
86477fc4c14SDimitry Andric /// Given a merge offset and a list of merge types, try to merge two chains
865b1c73532SDimitry Andric /// and update Gain with a better alternative.
86677fc4c14SDimitry Andric auto tryChainMerging = [&](size_t Offset,
8677fa27ce4SDimitry Andric const std::vector<MergeTypeT> &MergeTypes) {
868b1c73532SDimitry Andric // Skip merging corresponding to concatenation w/o splitting.
8697fa27ce4SDimitry Andric if (Offset == 0 || Offset == ChainPred->Nodes.size())
87077fc4c14SDimitry Andric return;
871b1c73532SDimitry Andric // Skip merging if it breaks Forced successors.
8727fa27ce4SDimitry Andric NodeT *Node = ChainPred->Nodes[Offset - 1];
8737fa27ce4SDimitry Andric if (Node->ForcedSucc != nullptr)
87477fc4c14SDimitry Andric return;
87577fc4c14SDimitry Andric // Apply the merge, compute the corresponding gain, and update the best
876b1c73532SDimitry Andric // value, if the merge is beneficial.
8777fa27ce4SDimitry Andric for (const MergeTypeT &MergeType : MergeTypes) {
87877fc4c14SDimitry Andric Gain.updateIfLessThan(
87977fc4c14SDimitry Andric computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
88077fc4c14SDimitry Andric }
88177fc4c14SDimitry Andric };
88277fc4c14SDimitry Andric
883b1c73532SDimitry Andric // Try to concatenate two chains w/o splitting.
88477fc4c14SDimitry Andric Gain.updateIfLessThan(
8857fa27ce4SDimitry Andric computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));
88677fc4c14SDimitry Andric
887b1c73532SDimitry Andric // Attach (a part of) ChainPred before the first node of ChainSucc.
8887fa27ce4SDimitry Andric for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
8897fa27ce4SDimitry Andric const NodeT *SrcBlock = Jump->Source;
89077fc4c14SDimitry Andric if (SrcBlock->CurChain != ChainPred)
89177fc4c14SDimitry Andric continue;
89277fc4c14SDimitry Andric size_t Offset = SrcBlock->CurIndex + 1;
8937fa27ce4SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
89477fc4c14SDimitry Andric }
89577fc4c14SDimitry Andric
896b1c73532SDimitry Andric // Attach (a part of) ChainPred after the last node of ChainSucc.
8977fa27ce4SDimitry Andric for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
898b1c73532SDimitry Andric const NodeT *DstBlock = Jump->Target;
89977fc4c14SDimitry Andric if (DstBlock->CurChain != ChainPred)
90077fc4c14SDimitry Andric continue;
90177fc4c14SDimitry Andric size_t Offset = DstBlock->CurIndex;
9027fa27ce4SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
90377fc4c14SDimitry Andric }
90477fc4c14SDimitry Andric
905b1c73532SDimitry Andric // Try to break ChainPred in various ways and concatenate with ChainSucc.
9067fa27ce4SDimitry Andric if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
9077fa27ce4SDimitry Andric for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
908b1c73532SDimitry Andric // Do not split the chain along a fall-through jump. One of the two
909b1c73532SDimitry Andric // loops above may still "break" such a jump whenever it results in a
910b1c73532SDimitry Andric // new fall-through.
911b1c73532SDimitry Andric const NodeT *BB = ChainPred->Nodes[Offset - 1];
912b1c73532SDimitry Andric const NodeT *BB2 = ChainPred->Nodes[Offset];
913b1c73532SDimitry Andric if (BB->isSuccessor(BB2))
914b1c73532SDimitry Andric continue;
915b1c73532SDimitry Andric
916b1c73532SDimitry Andric // In practice, applying X2_Y_X1 merging almost never provides benefits;
917b1c73532SDimitry Andric // thus, we exclude it from consideration to reduce the search space.
9187fa27ce4SDimitry Andric tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
9197fa27ce4SDimitry Andric MergeTypeT::X2_X1_Y});
92077fc4c14SDimitry Andric }
92177fc4c14SDimitry Andric }
922b1c73532SDimitry Andric
92377fc4c14SDimitry Andric Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
92477fc4c14SDimitry Andric return Gain;
92577fc4c14SDimitry Andric }
92677fc4c14SDimitry Andric
92777fc4c14SDimitry Andric /// Compute the score gain of merging two chains, respecting a given
92877fc4c14SDimitry Andric /// merge 'type' and 'offset'.
92977fc4c14SDimitry Andric ///
93077fc4c14SDimitry Andric /// The two chains are not modified in the method.
computeMergeGain(const ChainT * ChainPred,const ChainT * ChainSucc,const MergedJumpsT & Jumps,size_t MergeOffset,MergeTypeT MergeType) const9317fa27ce4SDimitry Andric MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
932b1c73532SDimitry Andric const MergedJumpsT &Jumps, size_t MergeOffset,
933b1c73532SDimitry Andric MergeTypeT MergeType) const {
934b1c73532SDimitry Andric MergedNodesT MergedNodes =
9357fa27ce4SDimitry Andric mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
93677fc4c14SDimitry Andric
937b1c73532SDimitry Andric // Do not allow a merge that does not preserve the original entry point.
93877fc4c14SDimitry Andric if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
939b1c73532SDimitry Andric !MergedNodes.getFirstNode()->isEntry())
9407fa27ce4SDimitry Andric return MergeGainT();
94177fc4c14SDimitry Andric
942b1c73532SDimitry Andric // The gain for the new chain.
943b1c73532SDimitry Andric double NewScore = extTSPScore(MergedNodes, Jumps);
944b1c73532SDimitry Andric double CurScore = ChainPred->Score;
945b1c73532SDimitry Andric return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
94677fc4c14SDimitry Andric }
94777fc4c14SDimitry Andric
94877fc4c14SDimitry Andric /// Merge chain From into chain Into, update the list of active chains,
94977fc4c14SDimitry Andric /// adjacency information, and the corresponding cached values.
mergeChains(ChainT * Into,ChainT * From,size_t MergeOffset,MergeTypeT MergeType)9507fa27ce4SDimitry Andric void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
9517fa27ce4SDimitry Andric MergeTypeT MergeType) {
95277fc4c14SDimitry Andric assert(Into != From && "a chain cannot be merged with itself");
95377fc4c14SDimitry Andric
954b1c73532SDimitry Andric // Merge the nodes.
955b1c73532SDimitry Andric MergedNodesT MergedNodes =
9567fa27ce4SDimitry Andric mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
9577fa27ce4SDimitry Andric Into->merge(From, MergedNodes.getNodes());
9587fa27ce4SDimitry Andric
959b1c73532SDimitry Andric // Merge the edges.
96077fc4c14SDimitry Andric Into->mergeEdges(From);
96177fc4c14SDimitry Andric From->clear();
96277fc4c14SDimitry Andric
963b1c73532SDimitry Andric // Update cached ext-tsp score for the new chain.
964e3b55780SDimitry Andric ChainEdge *SelfEdge = Into->getEdge(Into);
96577fc4c14SDimitry Andric if (SelfEdge != nullptr) {
966b1c73532SDimitry Andric MergedNodes = MergedNodesT(Into->Nodes.begin(), Into->Nodes.end());
967b1c73532SDimitry Andric MergedJumpsT MergedJumps(&SelfEdge->jumps());
968b1c73532SDimitry Andric Into->Score = extTSPScore(MergedNodes, MergedJumps);
96977fc4c14SDimitry Andric }
97077fc4c14SDimitry Andric
971b1c73532SDimitry Andric // Remove the chain from the list of active chains.
972b1c73532SDimitry Andric llvm::erase(HotChains, From);
97377fc4c14SDimitry Andric
974b1c73532SDimitry Andric // Invalidate caches.
9757fa27ce4SDimitry Andric for (auto EdgeIt : Into->Edges)
9767fa27ce4SDimitry Andric EdgeIt.second->invalidateCache();
97777fc4c14SDimitry Andric }
97877fc4c14SDimitry Andric
9797fa27ce4SDimitry Andric /// Concatenate all chains into the final order.
concatChains()980b1c73532SDimitry Andric std::vector<uint64_t> concatChains() {
981b1c73532SDimitry Andric // Collect non-empty chains.
9827fa27ce4SDimitry Andric std::vector<const ChainT *> SortedChains;
9837fa27ce4SDimitry Andric for (ChainT &Chain : AllChains) {
984b1c73532SDimitry Andric if (!Chain.Nodes.empty())
98577fc4c14SDimitry Andric SortedChains.push_back(&Chain);
98677fc4c14SDimitry Andric }
98777fc4c14SDimitry Andric
988b1c73532SDimitry Andric // Sorting chains by density in the decreasing order.
989b1c73532SDimitry Andric std::sort(SortedChains.begin(), SortedChains.end(),
9907fa27ce4SDimitry Andric [&](const ChainT *L, const ChainT *R) {
991b1c73532SDimitry Andric // Place the entry point at the beginning of the order.
9927fa27ce4SDimitry Andric if (L->isEntry() != R->isEntry())
9937fa27ce4SDimitry Andric return L->isEntry();
99477fc4c14SDimitry Andric
995b1c73532SDimitry Andric // Compare by density and break ties by chain identifiers.
996b1c73532SDimitry Andric return std::make_tuple(-L->density(), L->Id) <
997b1c73532SDimitry Andric std::make_tuple(-R->density(), R->Id);
99877fc4c14SDimitry Andric });
99977fc4c14SDimitry Andric
1000b1c73532SDimitry Andric // Collect the nodes in the order specified by their chains.
1001b1c73532SDimitry Andric std::vector<uint64_t> Order;
100277fc4c14SDimitry Andric Order.reserve(NumNodes);
1003b1c73532SDimitry Andric for (const ChainT *Chain : SortedChains)
1004b1c73532SDimitry Andric for (NodeT *Node : Chain->Nodes)
10057fa27ce4SDimitry Andric Order.push_back(Node->Index);
1006b1c73532SDimitry Andric return Order;
100777fc4c14SDimitry Andric }
100877fc4c14SDimitry Andric
100977fc4c14SDimitry Andric private:
101077fc4c14SDimitry Andric /// The number of nodes in the graph.
101177fc4c14SDimitry Andric const size_t NumNodes;
101277fc4c14SDimitry Andric
101377fc4c14SDimitry Andric /// Successors of each node.
101477fc4c14SDimitry Andric std::vector<std::vector<uint64_t>> SuccNodes;
101577fc4c14SDimitry Andric
101677fc4c14SDimitry Andric /// Predecessors of each node.
101777fc4c14SDimitry Andric std::vector<std::vector<uint64_t>> PredNodes;
101877fc4c14SDimitry Andric
10197fa27ce4SDimitry Andric /// All nodes (basic blocks) in the graph.
10207fa27ce4SDimitry Andric std::vector<NodeT> AllNodes;
102177fc4c14SDimitry Andric
10227fa27ce4SDimitry Andric /// All jumps between the nodes.
10237fa27ce4SDimitry Andric std::vector<JumpT> AllJumps;
102477fc4c14SDimitry Andric
10257fa27ce4SDimitry Andric /// All chains of nodes.
10267fa27ce4SDimitry Andric std::vector<ChainT> AllChains;
102777fc4c14SDimitry Andric
10287fa27ce4SDimitry Andric /// All edges between the chains.
102977fc4c14SDimitry Andric std::vector<ChainEdge> AllEdges;
103077fc4c14SDimitry Andric
103177fc4c14SDimitry Andric /// Active chains. The vector gets updated at runtime when chains are merged.
10327fa27ce4SDimitry Andric std::vector<ChainT *> HotChains;
103377fc4c14SDimitry Andric };
103477fc4c14SDimitry Andric
1035b1c73532SDimitry Andric /// The implementation of the Cache-Directed Sort (CDSort) algorithm for
1036b1c73532SDimitry Andric /// ordering functions represented by a call graph.
1037b1c73532SDimitry Andric class CDSortImpl {
1038b1c73532SDimitry Andric public:
CDSortImpl(const CDSortConfig & Config,ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts,ArrayRef<uint64_t> EdgeOffsets)1039b1c73532SDimitry Andric CDSortImpl(const CDSortConfig &Config, ArrayRef<uint64_t> NodeSizes,
1040b1c73532SDimitry Andric ArrayRef<uint64_t> NodeCounts, ArrayRef<EdgeCount> EdgeCounts,
1041b1c73532SDimitry Andric ArrayRef<uint64_t> EdgeOffsets)
1042b1c73532SDimitry Andric : Config(Config), NumNodes(NodeSizes.size()) {
1043b1c73532SDimitry Andric initialize(NodeSizes, NodeCounts, EdgeCounts, EdgeOffsets);
1044b1c73532SDimitry Andric }
1045b1c73532SDimitry Andric
1046b1c73532SDimitry Andric /// Run the algorithm and return an ordered set of function clusters.
run()1047b1c73532SDimitry Andric std::vector<uint64_t> run() {
1048b1c73532SDimitry Andric // Merge pairs of chains while improving the objective.
1049b1c73532SDimitry Andric mergeChainPairs();
1050b1c73532SDimitry Andric
1051b1c73532SDimitry Andric // Collect nodes from all the chains.
1052b1c73532SDimitry Andric return concatChains();
1053b1c73532SDimitry Andric }
1054b1c73532SDimitry Andric
1055b1c73532SDimitry Andric private:
1056b1c73532SDimitry Andric /// Initialize the algorithm's data structures.
initialize(const ArrayRef<uint64_t> & NodeSizes,const ArrayRef<uint64_t> & NodeCounts,const ArrayRef<EdgeCount> & EdgeCounts,const ArrayRef<uint64_t> & EdgeOffsets)1057b1c73532SDimitry Andric void initialize(const ArrayRef<uint64_t> &NodeSizes,
1058b1c73532SDimitry Andric const ArrayRef<uint64_t> &NodeCounts,
1059b1c73532SDimitry Andric const ArrayRef<EdgeCount> &EdgeCounts,
1060b1c73532SDimitry Andric const ArrayRef<uint64_t> &EdgeOffsets) {
1061b1c73532SDimitry Andric // Initialize nodes.
1062b1c73532SDimitry Andric AllNodes.reserve(NumNodes);
1063b1c73532SDimitry Andric for (uint64_t Node = 0; Node < NumNodes; Node++) {
1064b1c73532SDimitry Andric uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
1065b1c73532SDimitry Andric uint64_t ExecutionCount = NodeCounts[Node];
1066b1c73532SDimitry Andric AllNodes.emplace_back(Node, Size, ExecutionCount);
1067b1c73532SDimitry Andric TotalSamples += ExecutionCount;
1068b1c73532SDimitry Andric if (ExecutionCount > 0)
1069b1c73532SDimitry Andric TotalSize += Size;
1070b1c73532SDimitry Andric }
1071b1c73532SDimitry Andric
1072b1c73532SDimitry Andric // Initialize jumps between the nodes.
1073b1c73532SDimitry Andric SuccNodes.resize(NumNodes);
1074b1c73532SDimitry Andric PredNodes.resize(NumNodes);
1075b1c73532SDimitry Andric AllJumps.reserve(EdgeCounts.size());
1076b1c73532SDimitry Andric for (size_t I = 0; I < EdgeCounts.size(); I++) {
1077b1c73532SDimitry Andric auto [Pred, Succ, Count] = EdgeCounts[I];
1078b1c73532SDimitry Andric // Ignore recursive calls.
1079b1c73532SDimitry Andric if (Pred == Succ)
1080b1c73532SDimitry Andric continue;
1081b1c73532SDimitry Andric
1082b1c73532SDimitry Andric SuccNodes[Pred].push_back(Succ);
1083b1c73532SDimitry Andric PredNodes[Succ].push_back(Pred);
1084b1c73532SDimitry Andric if (Count > 0) {
1085b1c73532SDimitry Andric NodeT &PredNode = AllNodes[Pred];
1086b1c73532SDimitry Andric NodeT &SuccNode = AllNodes[Succ];
1087b1c73532SDimitry Andric AllJumps.emplace_back(&PredNode, &SuccNode, Count);
1088b1c73532SDimitry Andric AllJumps.back().Offset = EdgeOffsets[I];
1089b1c73532SDimitry Andric SuccNode.InJumps.push_back(&AllJumps.back());
1090b1c73532SDimitry Andric PredNode.OutJumps.push_back(&AllJumps.back());
1091b1c73532SDimitry Andric // Adjust execution counts.
1092b1c73532SDimitry Andric PredNode.ExecutionCount = std::max(PredNode.ExecutionCount, Count);
1093b1c73532SDimitry Andric SuccNode.ExecutionCount = std::max(SuccNode.ExecutionCount, Count);
1094b1c73532SDimitry Andric }
1095b1c73532SDimitry Andric }
1096b1c73532SDimitry Andric
1097b1c73532SDimitry Andric // Initialize chains.
1098b1c73532SDimitry Andric AllChains.reserve(NumNodes);
1099b1c73532SDimitry Andric for (NodeT &Node : AllNodes) {
1100b1c73532SDimitry Andric // Adjust execution counts.
1101b1c73532SDimitry Andric Node.ExecutionCount = std::max(Node.ExecutionCount, Node.inCount());
1102b1c73532SDimitry Andric Node.ExecutionCount = std::max(Node.ExecutionCount, Node.outCount());
1103b1c73532SDimitry Andric // Create chain.
1104b1c73532SDimitry Andric AllChains.emplace_back(Node.Index, &Node);
1105b1c73532SDimitry Andric Node.CurChain = &AllChains.back();
1106b1c73532SDimitry Andric }
1107b1c73532SDimitry Andric
1108b1c73532SDimitry Andric // Initialize chain edges.
1109b1c73532SDimitry Andric AllEdges.reserve(AllJumps.size());
1110b1c73532SDimitry Andric for (NodeT &PredNode : AllNodes) {
1111b1c73532SDimitry Andric for (JumpT *Jump : PredNode.OutJumps) {
1112b1c73532SDimitry Andric NodeT *SuccNode = Jump->Target;
1113b1c73532SDimitry Andric ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
1114b1c73532SDimitry Andric // This edge is already present in the graph.
1115b1c73532SDimitry Andric if (CurEdge != nullptr) {
1116b1c73532SDimitry Andric assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
1117b1c73532SDimitry Andric CurEdge->appendJump(Jump);
1118b1c73532SDimitry Andric continue;
1119b1c73532SDimitry Andric }
1120b1c73532SDimitry Andric // This is a new edge.
1121b1c73532SDimitry Andric AllEdges.emplace_back(Jump);
1122b1c73532SDimitry Andric PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
1123b1c73532SDimitry Andric SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
1124b1c73532SDimitry Andric }
1125b1c73532SDimitry Andric }
1126b1c73532SDimitry Andric }
1127b1c73532SDimitry Andric
1128b1c73532SDimitry Andric /// Merge pairs of chains while there is an improvement in the objective.
mergeChainPairs()1129b1c73532SDimitry Andric void mergeChainPairs() {
1130b1c73532SDimitry Andric // Create a priority queue containing all edges ordered by the merge gain.
1131b1c73532SDimitry Andric auto GainComparator = [](ChainEdge *L, ChainEdge *R) {
1132b1c73532SDimitry Andric return std::make_tuple(-L->gain(), L->srcChain()->Id, L->dstChain()->Id) <
1133b1c73532SDimitry Andric std::make_tuple(-R->gain(), R->srcChain()->Id, R->dstChain()->Id);
1134b1c73532SDimitry Andric };
1135b1c73532SDimitry Andric std::set<ChainEdge *, decltype(GainComparator)> Queue(GainComparator);
1136b1c73532SDimitry Andric
1137b1c73532SDimitry Andric // Insert the edges into the queue.
1138b1c73532SDimitry Andric [[maybe_unused]] size_t NumActiveChains = 0;
1139b1c73532SDimitry Andric for (NodeT &Node : AllNodes) {
1140b1c73532SDimitry Andric if (Node.ExecutionCount == 0)
1141b1c73532SDimitry Andric continue;
1142b1c73532SDimitry Andric ++NumActiveChains;
1143b1c73532SDimitry Andric for (const auto &[_, Edge] : Node.CurChain->Edges) {
1144b1c73532SDimitry Andric // Ignore self-edges.
1145b1c73532SDimitry Andric if (Edge->isSelfEdge())
1146b1c73532SDimitry Andric continue;
1147b1c73532SDimitry Andric // Ignore already processed edges.
1148b1c73532SDimitry Andric if (Edge->gain() != -1.0)
1149b1c73532SDimitry Andric continue;
1150b1c73532SDimitry Andric
1151b1c73532SDimitry Andric // Compute the gain of merging the two chains.
1152b1c73532SDimitry Andric MergeGainT Gain = getBestMergeGain(Edge);
1153b1c73532SDimitry Andric Edge->setMergeGain(Gain);
1154b1c73532SDimitry Andric
1155b1c73532SDimitry Andric if (Edge->gain() > EPS)
1156b1c73532SDimitry Andric Queue.insert(Edge);
1157b1c73532SDimitry Andric }
1158b1c73532SDimitry Andric }
1159b1c73532SDimitry Andric
1160b1c73532SDimitry Andric // Merge the chains while the gain of merging is positive.
1161b1c73532SDimitry Andric while (!Queue.empty()) {
1162b1c73532SDimitry Andric // Extract the best (top) edge for merging.
1163b1c73532SDimitry Andric ChainEdge *BestEdge = *Queue.begin();
1164b1c73532SDimitry Andric Queue.erase(Queue.begin());
1165b1c73532SDimitry Andric ChainT *BestSrcChain = BestEdge->srcChain();
1166b1c73532SDimitry Andric ChainT *BestDstChain = BestEdge->dstChain();
1167b1c73532SDimitry Andric
1168b1c73532SDimitry Andric // Remove outdated edges from the queue.
1169b1c73532SDimitry Andric for (const auto &[_, ChainEdge] : BestSrcChain->Edges)
1170b1c73532SDimitry Andric Queue.erase(ChainEdge);
1171b1c73532SDimitry Andric for (const auto &[_, ChainEdge] : BestDstChain->Edges)
1172b1c73532SDimitry Andric Queue.erase(ChainEdge);
1173b1c73532SDimitry Andric
1174b1c73532SDimitry Andric // Merge the best pair of chains.
1175b1c73532SDimitry Andric MergeGainT BestGain = BestEdge->getMergeGain();
1176b1c73532SDimitry Andric mergeChains(BestSrcChain, BestDstChain, BestGain.mergeOffset(),
1177b1c73532SDimitry Andric BestGain.mergeType());
1178b1c73532SDimitry Andric --NumActiveChains;
1179b1c73532SDimitry Andric
1180b1c73532SDimitry Andric // Insert newly created edges into the queue.
1181b1c73532SDimitry Andric for (const auto &[_, Edge] : BestSrcChain->Edges) {
1182b1c73532SDimitry Andric // Ignore loop edges.
1183b1c73532SDimitry Andric if (Edge->isSelfEdge())
1184b1c73532SDimitry Andric continue;
1185b1c73532SDimitry Andric if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
1186b1c73532SDimitry Andric Config.MaxChainSize)
1187b1c73532SDimitry Andric continue;
1188b1c73532SDimitry Andric
1189b1c73532SDimitry Andric // Compute the gain of merging the two chains.
1190b1c73532SDimitry Andric MergeGainT Gain = getBestMergeGain(Edge);
1191b1c73532SDimitry Andric Edge->setMergeGain(Gain);
1192b1c73532SDimitry Andric
1193b1c73532SDimitry Andric if (Edge->gain() > EPS)
1194b1c73532SDimitry Andric Queue.insert(Edge);
1195b1c73532SDimitry Andric }
1196b1c73532SDimitry Andric }
1197b1c73532SDimitry Andric
1198b1c73532SDimitry Andric LLVM_DEBUG(dbgs() << "Cache-directed function sorting reduced the number"
1199b1c73532SDimitry Andric << " of chains from " << NumNodes << " to "
1200b1c73532SDimitry Andric << NumActiveChains << "\n");
1201b1c73532SDimitry Andric }
1202b1c73532SDimitry Andric
1203b1c73532SDimitry Andric /// Compute the gain of merging two chains.
1204b1c73532SDimitry Andric ///
1205b1c73532SDimitry Andric /// The function considers all possible ways of merging two chains and
1206b1c73532SDimitry Andric /// computes the one having the largest increase in ExtTSP objective. The
1207b1c73532SDimitry Andric /// result is a pair with the first element being the gain and the second
1208b1c73532SDimitry Andric /// element being the corresponding merging type.
getBestMergeGain(ChainEdge * Edge) const1209b1c73532SDimitry Andric MergeGainT getBestMergeGain(ChainEdge *Edge) const {
1210b1c73532SDimitry Andric assert(!Edge->jumps().empty() && "trying to merge chains w/o jumps");
1211b1c73532SDimitry Andric // Precompute jumps between ChainPred and ChainSucc.
1212b1c73532SDimitry Andric MergedJumpsT Jumps(&Edge->jumps());
1213b1c73532SDimitry Andric ChainT *SrcChain = Edge->srcChain();
1214b1c73532SDimitry Andric ChainT *DstChain = Edge->dstChain();
1215b1c73532SDimitry Andric
1216b1c73532SDimitry Andric // This object holds the best currently chosen gain of merging two chains.
1217b1c73532SDimitry Andric MergeGainT Gain = MergeGainT();
1218b1c73532SDimitry Andric
1219b1c73532SDimitry Andric /// Given a list of merge types, try to merge two chains and update Gain
1220b1c73532SDimitry Andric /// with a better alternative.
1221b1c73532SDimitry Andric auto tryChainMerging = [&](const std::vector<MergeTypeT> &MergeTypes) {
1222b1c73532SDimitry Andric // Apply the merge, compute the corresponding gain, and update the best
1223b1c73532SDimitry Andric // value, if the merge is beneficial.
1224b1c73532SDimitry Andric for (const MergeTypeT &MergeType : MergeTypes) {
1225b1c73532SDimitry Andric MergeGainT NewGain =
1226b1c73532SDimitry Andric computeMergeGain(SrcChain, DstChain, Jumps, MergeType);
1227b1c73532SDimitry Andric
1228b1c73532SDimitry Andric // When forward and backward gains are the same, prioritize merging that
1229b1c73532SDimitry Andric // preserves the original order of the functions in the binary.
1230b1c73532SDimitry Andric if (std::abs(Gain.score() - NewGain.score()) < EPS) {
1231b1c73532SDimitry Andric if ((MergeType == MergeTypeT::X_Y && SrcChain->Id < DstChain->Id) ||
1232b1c73532SDimitry Andric (MergeType == MergeTypeT::Y_X && SrcChain->Id > DstChain->Id)) {
1233b1c73532SDimitry Andric Gain = NewGain;
1234b1c73532SDimitry Andric }
1235b1c73532SDimitry Andric } else if (NewGain.score() > Gain.score() + EPS) {
1236b1c73532SDimitry Andric Gain = NewGain;
1237b1c73532SDimitry Andric }
1238b1c73532SDimitry Andric }
1239b1c73532SDimitry Andric };
1240b1c73532SDimitry Andric
1241b1c73532SDimitry Andric // Try to concatenate two chains w/o splitting.
1242b1c73532SDimitry Andric tryChainMerging({MergeTypeT::X_Y, MergeTypeT::Y_X});
1243b1c73532SDimitry Andric
1244b1c73532SDimitry Andric return Gain;
1245b1c73532SDimitry Andric }
1246b1c73532SDimitry Andric
1247b1c73532SDimitry Andric /// Compute the score gain of merging two chains, respecting a given type.
1248b1c73532SDimitry Andric ///
1249b1c73532SDimitry Andric /// The two chains are not modified in the method.
computeMergeGain(ChainT * ChainPred,ChainT * ChainSucc,const MergedJumpsT & Jumps,MergeTypeT MergeType) const1250b1c73532SDimitry Andric MergeGainT computeMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
1251b1c73532SDimitry Andric const MergedJumpsT &Jumps,
1252b1c73532SDimitry Andric MergeTypeT MergeType) const {
1253b1c73532SDimitry Andric // This doesn't depend on the ordering of the nodes
1254b1c73532SDimitry Andric double FreqGain = freqBasedLocalityGain(ChainPred, ChainSucc);
1255b1c73532SDimitry Andric
1256b1c73532SDimitry Andric // Merge offset is always 0, as the chains are not split.
1257b1c73532SDimitry Andric size_t MergeOffset = 0;
1258b1c73532SDimitry Andric auto MergedBlocks =
1259b1c73532SDimitry Andric mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
1260b1c73532SDimitry Andric double DistGain = distBasedLocalityGain(MergedBlocks, Jumps);
1261b1c73532SDimitry Andric
1262b1c73532SDimitry Andric double GainScore = DistGain + Config.FrequencyScale * FreqGain;
1263b1c73532SDimitry Andric // Scale the result to increase the importance of merging short chains.
1264b1c73532SDimitry Andric if (GainScore >= 0.0)
1265b1c73532SDimitry Andric GainScore /= std::min(ChainPred->Size, ChainSucc->Size);
1266b1c73532SDimitry Andric
1267b1c73532SDimitry Andric return MergeGainT(GainScore, MergeOffset, MergeType);
1268b1c73532SDimitry Andric }
1269b1c73532SDimitry Andric
1270b1c73532SDimitry Andric /// Compute the change of the frequency locality after merging the chains.
freqBasedLocalityGain(ChainT * ChainPred,ChainT * ChainSucc) const1271b1c73532SDimitry Andric double freqBasedLocalityGain(ChainT *ChainPred, ChainT *ChainSucc) const {
1272b1c73532SDimitry Andric auto missProbability = [&](double ChainDensity) {
1273b1c73532SDimitry Andric double PageSamples = ChainDensity * Config.CacheSize;
1274b1c73532SDimitry Andric if (PageSamples >= TotalSamples)
1275b1c73532SDimitry Andric return 0.0;
1276b1c73532SDimitry Andric double P = PageSamples / TotalSamples;
1277b1c73532SDimitry Andric return pow(1.0 - P, static_cast<double>(Config.CacheEntries));
1278b1c73532SDimitry Andric };
1279b1c73532SDimitry Andric
1280b1c73532SDimitry Andric // Cache misses on the chains before merging.
1281b1c73532SDimitry Andric double CurScore =
1282b1c73532SDimitry Andric ChainPred->ExecutionCount * missProbability(ChainPred->density()) +
1283b1c73532SDimitry Andric ChainSucc->ExecutionCount * missProbability(ChainSucc->density());
1284b1c73532SDimitry Andric
1285b1c73532SDimitry Andric // Cache misses on the merged chain
1286b1c73532SDimitry Andric double MergedCounts = ChainPred->ExecutionCount + ChainSucc->ExecutionCount;
1287b1c73532SDimitry Andric double MergedSize = ChainPred->Size + ChainSucc->Size;
1288b1c73532SDimitry Andric double MergedDensity = static_cast<double>(MergedCounts) / MergedSize;
1289b1c73532SDimitry Andric double NewScore = MergedCounts * missProbability(MergedDensity);
1290b1c73532SDimitry Andric
1291b1c73532SDimitry Andric return CurScore - NewScore;
1292b1c73532SDimitry Andric }
1293b1c73532SDimitry Andric
1294b1c73532SDimitry Andric /// Compute the distance locality for a jump / call.
distScore(uint64_t SrcAddr,uint64_t DstAddr,uint64_t Count) const1295b1c73532SDimitry Andric double distScore(uint64_t SrcAddr, uint64_t DstAddr, uint64_t Count) const {
1296b1c73532SDimitry Andric uint64_t Dist = SrcAddr <= DstAddr ? DstAddr - SrcAddr : SrcAddr - DstAddr;
1297b1c73532SDimitry Andric double D = Dist == 0 ? 0.1 : static_cast<double>(Dist);
1298b1c73532SDimitry Andric return static_cast<double>(Count) * std::pow(D, -Config.DistancePower);
1299b1c73532SDimitry Andric }
1300b1c73532SDimitry Andric
1301b1c73532SDimitry Andric /// Compute the change of the distance locality after merging the chains.
distBasedLocalityGain(const MergedNodesT & Nodes,const MergedJumpsT & Jumps) const1302b1c73532SDimitry Andric double distBasedLocalityGain(const MergedNodesT &Nodes,
1303b1c73532SDimitry Andric const MergedJumpsT &Jumps) const {
1304b1c73532SDimitry Andric uint64_t CurAddr = 0;
1305b1c73532SDimitry Andric Nodes.forEach([&](const NodeT *Node) {
1306b1c73532SDimitry Andric Node->EstimatedAddr = CurAddr;
1307b1c73532SDimitry Andric CurAddr += Node->Size;
1308b1c73532SDimitry Andric });
1309b1c73532SDimitry Andric
1310b1c73532SDimitry Andric double CurScore = 0;
1311b1c73532SDimitry Andric double NewScore = 0;
1312b1c73532SDimitry Andric Jumps.forEach([&](const JumpT *Jump) {
1313b1c73532SDimitry Andric uint64_t SrcAddr = Jump->Source->EstimatedAddr + Jump->Offset;
1314b1c73532SDimitry Andric uint64_t DstAddr = Jump->Target->EstimatedAddr;
1315b1c73532SDimitry Andric NewScore += distScore(SrcAddr, DstAddr, Jump->ExecutionCount);
1316b1c73532SDimitry Andric CurScore += distScore(0, TotalSize, Jump->ExecutionCount);
1317b1c73532SDimitry Andric });
1318b1c73532SDimitry Andric return NewScore - CurScore;
1319b1c73532SDimitry Andric }
1320b1c73532SDimitry Andric
1321b1c73532SDimitry Andric /// Merge chain From into chain Into, update the list of active chains,
1322b1c73532SDimitry Andric /// adjacency information, and the corresponding cached values.
mergeChains(ChainT * Into,ChainT * From,size_t MergeOffset,MergeTypeT MergeType)1323b1c73532SDimitry Andric void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
1324b1c73532SDimitry Andric MergeTypeT MergeType) {
1325b1c73532SDimitry Andric assert(Into != From && "a chain cannot be merged with itself");
1326b1c73532SDimitry Andric
1327b1c73532SDimitry Andric // Merge the nodes.
1328b1c73532SDimitry Andric MergedNodesT MergedNodes =
1329b1c73532SDimitry Andric mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
1330b1c73532SDimitry Andric Into->merge(From, MergedNodes.getNodes());
1331b1c73532SDimitry Andric
1332b1c73532SDimitry Andric // Merge the edges.
1333b1c73532SDimitry Andric Into->mergeEdges(From);
1334b1c73532SDimitry Andric From->clear();
1335b1c73532SDimitry Andric }
1336b1c73532SDimitry Andric
1337b1c73532SDimitry Andric /// Concatenate all chains into the final order.
concatChains()1338b1c73532SDimitry Andric std::vector<uint64_t> concatChains() {
1339b1c73532SDimitry Andric // Collect chains and calculate density stats for their sorting.
1340b1c73532SDimitry Andric std::vector<const ChainT *> SortedChains;
1341b1c73532SDimitry Andric DenseMap<const ChainT *, double> ChainDensity;
1342b1c73532SDimitry Andric for (ChainT &Chain : AllChains) {
1343b1c73532SDimitry Andric if (!Chain.Nodes.empty()) {
1344b1c73532SDimitry Andric SortedChains.push_back(&Chain);
1345b1c73532SDimitry Andric // Using doubles to avoid overflow of ExecutionCounts.
1346b1c73532SDimitry Andric double Size = 0;
1347b1c73532SDimitry Andric double ExecutionCount = 0;
1348b1c73532SDimitry Andric for (NodeT *Node : Chain.Nodes) {
1349b1c73532SDimitry Andric Size += static_cast<double>(Node->Size);
1350b1c73532SDimitry Andric ExecutionCount += static_cast<double>(Node->ExecutionCount);
1351b1c73532SDimitry Andric }
1352b1c73532SDimitry Andric assert(Size > 0 && "a chain of zero size");
1353b1c73532SDimitry Andric ChainDensity[&Chain] = ExecutionCount / Size;
1354b1c73532SDimitry Andric }
1355b1c73532SDimitry Andric }
1356b1c73532SDimitry Andric
1357b1c73532SDimitry Andric // Sort chains by density in the decreasing order.
1358b1c73532SDimitry Andric std::sort(SortedChains.begin(), SortedChains.end(),
1359b1c73532SDimitry Andric [&](const ChainT *L, const ChainT *R) {
1360b1c73532SDimitry Andric const double DL = ChainDensity[L];
1361b1c73532SDimitry Andric const double DR = ChainDensity[R];
1362b1c73532SDimitry Andric // Compare by density and break ties by chain identifiers.
1363b1c73532SDimitry Andric return std::make_tuple(-DL, L->Id) <
1364b1c73532SDimitry Andric std::make_tuple(-DR, R->Id);
1365b1c73532SDimitry Andric });
1366b1c73532SDimitry Andric
1367b1c73532SDimitry Andric // Collect the nodes in the order specified by their chains.
1368b1c73532SDimitry Andric std::vector<uint64_t> Order;
1369b1c73532SDimitry Andric Order.reserve(NumNodes);
1370b1c73532SDimitry Andric for (const ChainT *Chain : SortedChains)
1371b1c73532SDimitry Andric for (NodeT *Node : Chain->Nodes)
1372b1c73532SDimitry Andric Order.push_back(Node->Index);
1373b1c73532SDimitry Andric return Order;
1374b1c73532SDimitry Andric }
1375b1c73532SDimitry Andric
1376b1c73532SDimitry Andric private:
1377b1c73532SDimitry Andric /// Config for the algorithm.
1378b1c73532SDimitry Andric const CDSortConfig Config;
1379b1c73532SDimitry Andric
1380b1c73532SDimitry Andric /// The number of nodes in the graph.
1381b1c73532SDimitry Andric const size_t NumNodes;
1382b1c73532SDimitry Andric
1383b1c73532SDimitry Andric /// Successors of each node.
1384b1c73532SDimitry Andric std::vector<std::vector<uint64_t>> SuccNodes;
1385b1c73532SDimitry Andric
1386b1c73532SDimitry Andric /// Predecessors of each node.
1387b1c73532SDimitry Andric std::vector<std::vector<uint64_t>> PredNodes;
1388b1c73532SDimitry Andric
1389b1c73532SDimitry Andric /// All nodes (functions) in the graph.
1390b1c73532SDimitry Andric std::vector<NodeT> AllNodes;
1391b1c73532SDimitry Andric
1392b1c73532SDimitry Andric /// All jumps (function calls) between the nodes.
1393b1c73532SDimitry Andric std::vector<JumpT> AllJumps;
1394b1c73532SDimitry Andric
1395b1c73532SDimitry Andric /// All chains of nodes.
1396b1c73532SDimitry Andric std::vector<ChainT> AllChains;
1397b1c73532SDimitry Andric
1398b1c73532SDimitry Andric /// All edges between the chains.
1399b1c73532SDimitry Andric std::vector<ChainEdge> AllEdges;
1400b1c73532SDimitry Andric
1401b1c73532SDimitry Andric /// The total number of samples in the graph.
1402b1c73532SDimitry Andric uint64_t TotalSamples{0};
1403b1c73532SDimitry Andric
1404b1c73532SDimitry Andric /// The total size of the nodes in the graph.
1405b1c73532SDimitry Andric uint64_t TotalSize{0};
1406b1c73532SDimitry Andric };
1407b1c73532SDimitry Andric
140877fc4c14SDimitry Andric } // end of anonymous namespace
140977fc4c14SDimitry Andric
14107fa27ce4SDimitry Andric std::vector<uint64_t>
computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1411b1c73532SDimitry Andric codelayout::computeExtTspLayout(ArrayRef<uint64_t> NodeSizes,
1412b1c73532SDimitry Andric ArrayRef<uint64_t> NodeCounts,
1413b1c73532SDimitry Andric ArrayRef<EdgeCount> EdgeCounts) {
1414b1c73532SDimitry Andric // Verify correctness of the input data.
141577fc4c14SDimitry Andric assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
14167fa27ce4SDimitry Andric assert(NodeSizes.size() > 2 && "Incorrect input");
141777fc4c14SDimitry Andric
1418b1c73532SDimitry Andric // Apply the reordering algorithm.
14197fa27ce4SDimitry Andric ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts);
1420b1c73532SDimitry Andric std::vector<uint64_t> Result = Alg.run();
142177fc4c14SDimitry Andric
1422b1c73532SDimitry Andric // Verify correctness of the output.
142377fc4c14SDimitry Andric assert(Result.front() == 0 && "Original entry point is not preserved");
14247fa27ce4SDimitry Andric assert(Result.size() == NodeSizes.size() && "Incorrect size of layout");
142577fc4c14SDimitry Andric return Result;
142677fc4c14SDimitry Andric }
142777fc4c14SDimitry Andric
calcExtTspScore(ArrayRef<uint64_t> Order,ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1428b1c73532SDimitry Andric double codelayout::calcExtTspScore(ArrayRef<uint64_t> Order,
1429b1c73532SDimitry Andric ArrayRef<uint64_t> NodeSizes,
1430b1c73532SDimitry Andric ArrayRef<uint64_t> NodeCounts,
1431b1c73532SDimitry Andric ArrayRef<EdgeCount> EdgeCounts) {
1432b1c73532SDimitry Andric // Estimate addresses of the blocks in memory.
1433e3b55780SDimitry Andric std::vector<uint64_t> Addr(NodeSizes.size(), 0);
143477fc4c14SDimitry Andric for (size_t Idx = 1; Idx < Order.size(); Idx++) {
143577fc4c14SDimitry Andric Addr[Order[Idx]] = Addr[Order[Idx - 1]] + NodeSizes[Order[Idx - 1]];
143677fc4c14SDimitry Andric }
1437e3b55780SDimitry Andric std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
1438b1c73532SDimitry Andric for (auto Edge : EdgeCounts)
1439b1c73532SDimitry Andric ++OutDegree[Edge.src];
144077fc4c14SDimitry Andric
1441b1c73532SDimitry Andric // Increase the score for each jump.
144277fc4c14SDimitry Andric double Score = 0;
1443b1c73532SDimitry Andric for (auto Edge : EdgeCounts) {
1444b1c73532SDimitry Andric bool IsConditional = OutDegree[Edge.src] > 1;
1445b1c73532SDimitry Andric Score += ::extTSPScore(Addr[Edge.src], NodeSizes[Edge.src], Addr[Edge.dst],
1446b1c73532SDimitry Andric Edge.count, IsConditional);
144777fc4c14SDimitry Andric }
144877fc4c14SDimitry Andric return Score;
144977fc4c14SDimitry Andric }
145077fc4c14SDimitry Andric
calcExtTspScore(ArrayRef<uint64_t> NodeSizes,ArrayRef<uint64_t> NodeCounts,ArrayRef<EdgeCount> EdgeCounts)1451b1c73532SDimitry Andric double codelayout::calcExtTspScore(ArrayRef<uint64_t> NodeSizes,
1452b1c73532SDimitry Andric ArrayRef<uint64_t> NodeCounts,
1453b1c73532SDimitry Andric ArrayRef<EdgeCount> EdgeCounts) {
1454e3b55780SDimitry Andric std::vector<uint64_t> Order(NodeSizes.size());
145577fc4c14SDimitry Andric for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
145677fc4c14SDimitry Andric Order[Idx] = Idx;
145777fc4c14SDimitry Andric }
145877fc4c14SDimitry Andric return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts);
145977fc4c14SDimitry Andric }
1460b1c73532SDimitry Andric
computeCacheDirectedLayout(const CDSortConfig & Config,ArrayRef<uint64_t> FuncSizes,ArrayRef<uint64_t> FuncCounts,ArrayRef<EdgeCount> CallCounts,ArrayRef<uint64_t> CallOffsets)1461b1c73532SDimitry Andric std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
1462b1c73532SDimitry Andric const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes,
1463b1c73532SDimitry Andric ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts,
1464b1c73532SDimitry Andric ArrayRef<uint64_t> CallOffsets) {
1465b1c73532SDimitry Andric // Verify correctness of the input data.
1466b1c73532SDimitry Andric assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input");
1467b1c73532SDimitry Andric
1468b1c73532SDimitry Andric // Apply the reordering algorithm.
1469b1c73532SDimitry Andric CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets);
1470b1c73532SDimitry Andric std::vector<uint64_t> Result = Alg.run();
1471b1c73532SDimitry Andric assert(Result.size() == FuncSizes.size() && "Incorrect size of layout");
1472b1c73532SDimitry Andric return Result;
1473b1c73532SDimitry Andric }
1474b1c73532SDimitry Andric
computeCacheDirectedLayout(ArrayRef<uint64_t> FuncSizes,ArrayRef<uint64_t> FuncCounts,ArrayRef<EdgeCount> CallCounts,ArrayRef<uint64_t> CallOffsets)1475b1c73532SDimitry Andric std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
1476b1c73532SDimitry Andric ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts,
1477b1c73532SDimitry Andric ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets) {
1478b1c73532SDimitry Andric CDSortConfig Config;
1479b1c73532SDimitry Andric // Populate the config from the command-line options.
1480b1c73532SDimitry Andric if (CacheEntries.getNumOccurrences() > 0)
1481b1c73532SDimitry Andric Config.CacheEntries = CacheEntries;
1482b1c73532SDimitry Andric if (CacheSize.getNumOccurrences() > 0)
1483b1c73532SDimitry Andric Config.CacheSize = CacheSize;
1484b1c73532SDimitry Andric if (CDMaxChainSize.getNumOccurrences() > 0)
1485b1c73532SDimitry Andric Config.MaxChainSize = CDMaxChainSize;
1486b1c73532SDimitry Andric if (DistancePower.getNumOccurrences() > 0)
1487b1c73532SDimitry Andric Config.DistancePower = DistancePower;
1488b1c73532SDimitry Andric if (FrequencyScale.getNumOccurrences() > 0)
1489b1c73532SDimitry Andric Config.FrequencyScale = FrequencyScale;
1490b1c73532SDimitry Andric return computeCacheDirectedLayout(Config, FuncSizes, FuncCounts, CallCounts,
1491b1c73532SDimitry Andric CallOffsets);
1492b1c73532SDimitry Andric }
1493