1ac9a064cSDimitry Andric //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2ac9a064cSDimitry Andric //
3ac9a064cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ac9a064cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5ac9a064cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ac9a064cSDimitry Andric //
7ac9a064cSDimitry Andric //===----------------------------------------------------------------------===//
8ac9a064cSDimitry Andric //
9ac9a064cSDimitry Andric // This file implements the SampleProfileMatcher used for stale
10ac9a064cSDimitry Andric // profile matching.
11ac9a064cSDimitry Andric //
12ac9a064cSDimitry Andric //===----------------------------------------------------------------------===//
13ac9a064cSDimitry Andric
14ac9a064cSDimitry Andric #include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15ac9a064cSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
16ac9a064cSDimitry Andric #include "llvm/IR/MDBuilder.h"
17ac9a064cSDimitry Andric #include "llvm/Support/CommandLine.h"
18ac9a064cSDimitry Andric
19ac9a064cSDimitry Andric using namespace llvm;
20ac9a064cSDimitry Andric using namespace sampleprof;
21ac9a064cSDimitry Andric
22ac9a064cSDimitry Andric #define DEBUG_TYPE "sample-profile-matcher"
23ac9a064cSDimitry Andric
24ac9a064cSDimitry Andric static cl::opt<unsigned> FuncProfileSimilarityThreshold(
25ac9a064cSDimitry Andric "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
26ac9a064cSDimitry Andric cl::desc("Consider a profile matches a function if the similarity of their "
27ac9a064cSDimitry Andric "callee sequences is above the specified percentile."));
28ac9a064cSDimitry Andric
29ac9a064cSDimitry Andric static cl::opt<unsigned> MinFuncCountForCGMatching(
30ac9a064cSDimitry Andric "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
31ac9a064cSDimitry Andric cl::desc("The minimum number of basic blocks required for a function to "
32ac9a064cSDimitry Andric "run stale profile call graph matching."));
33ac9a064cSDimitry Andric
34ac9a064cSDimitry Andric static cl::opt<unsigned> MinCallCountForCGMatching(
35ac9a064cSDimitry Andric "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
36ac9a064cSDimitry Andric cl::desc("The minimum number of call anchors required for a function to "
37ac9a064cSDimitry Andric "run stale profile call graph matching."));
38ac9a064cSDimitry Andric
39ac9a064cSDimitry Andric extern cl::opt<bool> SalvageStaleProfile;
40ac9a064cSDimitry Andric extern cl::opt<bool> SalvageUnusedProfile;
41ac9a064cSDimitry Andric extern cl::opt<bool> PersistProfileStaleness;
42ac9a064cSDimitry Andric extern cl::opt<bool> ReportProfileStaleness;
43ac9a064cSDimitry Andric
44ac9a064cSDimitry Andric static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
45ac9a064cSDimitry Andric "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
46ac9a064cSDimitry Andric cl::desc("The maximum number of callsites in a function, above which stale "
47ac9a064cSDimitry Andric "profile matching will be skipped."));
48ac9a064cSDimitry Andric
findIRAnchors(const Function & F,AnchorMap & IRAnchors) const49ac9a064cSDimitry Andric void SampleProfileMatcher::findIRAnchors(const Function &F,
50ac9a064cSDimitry Andric AnchorMap &IRAnchors) const {
51ac9a064cSDimitry Andric // For inlined code, recover the original callsite and callee by finding the
52ac9a064cSDimitry Andric // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
53ac9a064cSDimitry Andric // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
54ac9a064cSDimitry Andric auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
55ac9a064cSDimitry Andric assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
56ac9a064cSDimitry Andric const DILocation *PrevDIL = nullptr;
57ac9a064cSDimitry Andric do {
58ac9a064cSDimitry Andric PrevDIL = DIL;
59ac9a064cSDimitry Andric DIL = DIL->getInlinedAt();
60ac9a064cSDimitry Andric } while (DIL->getInlinedAt());
61ac9a064cSDimitry Andric
62ac9a064cSDimitry Andric LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
63ac9a064cSDimitry Andric DIL, FunctionSamples::ProfileIsFS);
64ac9a064cSDimitry Andric StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
65ac9a064cSDimitry Andric return std::make_pair(Callsite, FunctionId(CalleeName));
66ac9a064cSDimitry Andric };
67ac9a064cSDimitry Andric
68ac9a064cSDimitry Andric auto GetCanonicalCalleeName = [](const CallBase *CB) {
69ac9a064cSDimitry Andric StringRef CalleeName = UnknownIndirectCallee;
70ac9a064cSDimitry Andric if (Function *Callee = CB->getCalledFunction())
71ac9a064cSDimitry Andric CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
72ac9a064cSDimitry Andric return CalleeName;
73ac9a064cSDimitry Andric };
74ac9a064cSDimitry Andric
75ac9a064cSDimitry Andric // Extract profile matching anchors in the IR.
76ac9a064cSDimitry Andric for (auto &BB : F) {
77ac9a064cSDimitry Andric for (auto &I : BB) {
78ac9a064cSDimitry Andric DILocation *DIL = I.getDebugLoc();
79ac9a064cSDimitry Andric if (!DIL)
80ac9a064cSDimitry Andric continue;
81ac9a064cSDimitry Andric
82ac9a064cSDimitry Andric if (FunctionSamples::ProfileIsProbeBased) {
83ac9a064cSDimitry Andric if (auto Probe = extractProbe(I)) {
84ac9a064cSDimitry Andric // Flatten inlined IR for the matching.
85ac9a064cSDimitry Andric if (DIL->getInlinedAt()) {
86ac9a064cSDimitry Andric IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
87ac9a064cSDimitry Andric } else {
88ac9a064cSDimitry Andric // Use empty StringRef for basic block probe.
89ac9a064cSDimitry Andric StringRef CalleeName;
90ac9a064cSDimitry Andric if (const auto *CB = dyn_cast<CallBase>(&I)) {
91ac9a064cSDimitry Andric // Skip the probe inst whose callee name is "llvm.pseudoprobe".
92ac9a064cSDimitry Andric if (!isa<IntrinsicInst>(&I))
93ac9a064cSDimitry Andric CalleeName = GetCanonicalCalleeName(CB);
94ac9a064cSDimitry Andric }
95ac9a064cSDimitry Andric LineLocation Loc = LineLocation(Probe->Id, 0);
96ac9a064cSDimitry Andric IRAnchors.emplace(Loc, FunctionId(CalleeName));
97ac9a064cSDimitry Andric }
98ac9a064cSDimitry Andric }
99ac9a064cSDimitry Andric } else {
100ac9a064cSDimitry Andric // TODO: For line-number based profile(AutoFDO), currently only support
101ac9a064cSDimitry Andric // find callsite anchors. In future, we need to parse all the non-call
102ac9a064cSDimitry Andric // instructions to extract the line locations for profile matching.
103ac9a064cSDimitry Andric if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
104ac9a064cSDimitry Andric continue;
105ac9a064cSDimitry Andric
106ac9a064cSDimitry Andric if (DIL->getInlinedAt()) {
107ac9a064cSDimitry Andric IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
108ac9a064cSDimitry Andric } else {
109ac9a064cSDimitry Andric LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
110ac9a064cSDimitry Andric DIL, FunctionSamples::ProfileIsFS);
111ac9a064cSDimitry Andric StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
112ac9a064cSDimitry Andric IRAnchors.emplace(Callsite, FunctionId(CalleeName));
113ac9a064cSDimitry Andric }
114ac9a064cSDimitry Andric }
115ac9a064cSDimitry Andric }
116ac9a064cSDimitry Andric }
117ac9a064cSDimitry Andric }
118ac9a064cSDimitry Andric
findProfileAnchors(const FunctionSamples & FS,AnchorMap & ProfileAnchors) const119ac9a064cSDimitry Andric void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
120ac9a064cSDimitry Andric AnchorMap &ProfileAnchors) const {
121ac9a064cSDimitry Andric auto isInvalidLineOffset = [](uint32_t LineOffset) {
122ac9a064cSDimitry Andric return LineOffset & 0x8000;
123ac9a064cSDimitry Andric };
124ac9a064cSDimitry Andric
125ac9a064cSDimitry Andric auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
126ac9a064cSDimitry Andric AnchorMap &ProfileAnchors) {
127ac9a064cSDimitry Andric auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
128ac9a064cSDimitry Andric if (!Ret.second) {
129ac9a064cSDimitry Andric // For multiple callees, which indicates it's an indirect call, we use a
130ac9a064cSDimitry Andric // dummy name(UnknownIndirectCallee) as the indrect callee name.
131ac9a064cSDimitry Andric Ret.first->second = FunctionId(UnknownIndirectCallee);
132ac9a064cSDimitry Andric }
133ac9a064cSDimitry Andric };
134ac9a064cSDimitry Andric
135ac9a064cSDimitry Andric for (const auto &I : FS.getBodySamples()) {
136ac9a064cSDimitry Andric const LineLocation &Loc = I.first;
137ac9a064cSDimitry Andric if (isInvalidLineOffset(Loc.LineOffset))
138ac9a064cSDimitry Andric continue;
139ac9a064cSDimitry Andric for (const auto &C : I.second.getCallTargets())
140ac9a064cSDimitry Andric InsertAnchor(Loc, C.first, ProfileAnchors);
141ac9a064cSDimitry Andric }
142ac9a064cSDimitry Andric
143ac9a064cSDimitry Andric for (const auto &I : FS.getCallsiteSamples()) {
144ac9a064cSDimitry Andric const LineLocation &Loc = I.first;
145ac9a064cSDimitry Andric if (isInvalidLineOffset(Loc.LineOffset))
146ac9a064cSDimitry Andric continue;
147ac9a064cSDimitry Andric for (const auto &C : I.second)
148ac9a064cSDimitry Andric InsertAnchor(Loc, C.first, ProfileAnchors);
149ac9a064cSDimitry Andric }
150ac9a064cSDimitry Andric }
151ac9a064cSDimitry Andric
functionHasProfile(const FunctionId & IRFuncName,Function * & FuncWithoutProfile)152ac9a064cSDimitry Andric bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
153ac9a064cSDimitry Andric Function *&FuncWithoutProfile) {
154ac9a064cSDimitry Andric FuncWithoutProfile = nullptr;
155ac9a064cSDimitry Andric auto R = FunctionsWithoutProfile.find(IRFuncName);
156ac9a064cSDimitry Andric if (R != FunctionsWithoutProfile.end())
157ac9a064cSDimitry Andric FuncWithoutProfile = R->second;
158ac9a064cSDimitry Andric return !FuncWithoutProfile;
159ac9a064cSDimitry Andric }
160ac9a064cSDimitry Andric
isProfileUnused(const FunctionId & ProfileFuncName)161ac9a064cSDimitry Andric bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
162ac9a064cSDimitry Andric return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
163ac9a064cSDimitry Andric }
164ac9a064cSDimitry Andric
functionMatchesProfile(const FunctionId & IRFuncName,const FunctionId & ProfileFuncName,bool FindMatchedProfileOnly)165ac9a064cSDimitry Andric bool SampleProfileMatcher::functionMatchesProfile(
166ac9a064cSDimitry Andric const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
167ac9a064cSDimitry Andric bool FindMatchedProfileOnly) {
168ac9a064cSDimitry Andric if (IRFuncName == ProfileFuncName)
169ac9a064cSDimitry Andric return true;
170ac9a064cSDimitry Andric if (!SalvageUnusedProfile)
171ac9a064cSDimitry Andric return false;
172ac9a064cSDimitry Andric
173ac9a064cSDimitry Andric // If IR function doesn't have profile and the profile is unused, try
174ac9a064cSDimitry Andric // matching them.
175ac9a064cSDimitry Andric Function *IRFunc = nullptr;
176ac9a064cSDimitry Andric if (functionHasProfile(IRFuncName, IRFunc) ||
177ac9a064cSDimitry Andric !isProfileUnused(ProfileFuncName))
178ac9a064cSDimitry Andric return false;
179ac9a064cSDimitry Andric
180ac9a064cSDimitry Andric assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
181ac9a064cSDimitry Andric "IR function should be different from profile function to match");
182ac9a064cSDimitry Andric return functionMatchesProfile(*IRFunc, ProfileFuncName,
183ac9a064cSDimitry Andric FindMatchedProfileOnly);
184ac9a064cSDimitry Andric }
185ac9a064cSDimitry Andric
186ac9a064cSDimitry Andric LocToLocMap
longestCommonSequence(const AnchorList & AnchorList1,const AnchorList & AnchorList2,bool MatchUnusedFunction)187ac9a064cSDimitry Andric SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
188ac9a064cSDimitry Andric const AnchorList &AnchorList2,
189ac9a064cSDimitry Andric bool MatchUnusedFunction) {
190ac9a064cSDimitry Andric int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
191ac9a064cSDimitry Andric MaxDepth = Size1 + Size2;
192ac9a064cSDimitry Andric auto Index = [&](int32_t I) { return I + MaxDepth; };
193ac9a064cSDimitry Andric
194ac9a064cSDimitry Andric LocToLocMap EqualLocations;
195ac9a064cSDimitry Andric if (MaxDepth == 0)
196ac9a064cSDimitry Andric return EqualLocations;
197ac9a064cSDimitry Andric
198ac9a064cSDimitry Andric // Backtrack the SES result.
199ac9a064cSDimitry Andric auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
200ac9a064cSDimitry Andric const AnchorList &AnchorList1,
201ac9a064cSDimitry Andric const AnchorList &AnchorList2,
202ac9a064cSDimitry Andric LocToLocMap &EqualLocations) {
203ac9a064cSDimitry Andric int32_t X = Size1, Y = Size2;
204ac9a064cSDimitry Andric for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
205ac9a064cSDimitry Andric const auto &P = Trace[Depth];
206ac9a064cSDimitry Andric int32_t K = X - Y;
207ac9a064cSDimitry Andric int32_t PrevK = K;
208ac9a064cSDimitry Andric if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
209ac9a064cSDimitry Andric PrevK = K + 1;
210ac9a064cSDimitry Andric else
211ac9a064cSDimitry Andric PrevK = K - 1;
212ac9a064cSDimitry Andric
213ac9a064cSDimitry Andric int32_t PrevX = P[Index(PrevK)];
214ac9a064cSDimitry Andric int32_t PrevY = PrevX - PrevK;
215ac9a064cSDimitry Andric while (X > PrevX && Y > PrevY) {
216ac9a064cSDimitry Andric X--;
217ac9a064cSDimitry Andric Y--;
218ac9a064cSDimitry Andric EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
219ac9a064cSDimitry Andric }
220ac9a064cSDimitry Andric
221ac9a064cSDimitry Andric if (Depth == 0)
222ac9a064cSDimitry Andric break;
223ac9a064cSDimitry Andric
224ac9a064cSDimitry Andric if (Y == PrevY)
225ac9a064cSDimitry Andric X--;
226ac9a064cSDimitry Andric else if (X == PrevX)
227ac9a064cSDimitry Andric Y--;
228ac9a064cSDimitry Andric X = PrevX;
229ac9a064cSDimitry Andric Y = PrevY;
230ac9a064cSDimitry Andric }
231ac9a064cSDimitry Andric };
232ac9a064cSDimitry Andric
233ac9a064cSDimitry Andric // The greedy LCS/SES algorithm.
234ac9a064cSDimitry Andric
235ac9a064cSDimitry Andric // An array contains the endpoints of the furthest reaching D-paths.
236ac9a064cSDimitry Andric std::vector<int32_t> V(2 * MaxDepth + 1, -1);
237ac9a064cSDimitry Andric V[Index(1)] = 0;
238ac9a064cSDimitry Andric // Trace is used to backtrack the SES result.
239ac9a064cSDimitry Andric std::vector<std::vector<int32_t>> Trace;
240ac9a064cSDimitry Andric for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
241ac9a064cSDimitry Andric Trace.push_back(V);
242ac9a064cSDimitry Andric for (int32_t K = -Depth; K <= Depth; K += 2) {
243ac9a064cSDimitry Andric int32_t X = 0, Y = 0;
244ac9a064cSDimitry Andric if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
245ac9a064cSDimitry Andric X = V[Index(K + 1)];
246ac9a064cSDimitry Andric else
247ac9a064cSDimitry Andric X = V[Index(K - 1)] + 1;
248ac9a064cSDimitry Andric Y = X - K;
249ac9a064cSDimitry Andric while (X < Size1 && Y < Size2 &&
250ac9a064cSDimitry Andric functionMatchesProfile(
251ac9a064cSDimitry Andric AnchorList1[X].second, AnchorList2[Y].second,
252ac9a064cSDimitry Andric !MatchUnusedFunction /* Find matched function only */))
253ac9a064cSDimitry Andric X++, Y++;
254ac9a064cSDimitry Andric
255ac9a064cSDimitry Andric V[Index(K)] = X;
256ac9a064cSDimitry Andric
257ac9a064cSDimitry Andric if (X >= Size1 && Y >= Size2) {
258ac9a064cSDimitry Andric // Length of an SES is D.
259ac9a064cSDimitry Andric Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
260ac9a064cSDimitry Andric return EqualLocations;
261ac9a064cSDimitry Andric }
262ac9a064cSDimitry Andric }
263ac9a064cSDimitry Andric }
264ac9a064cSDimitry Andric // Length of an SES is greater than MaxDepth.
265ac9a064cSDimitry Andric return EqualLocations;
266ac9a064cSDimitry Andric }
267ac9a064cSDimitry Andric
matchNonCallsiteLocs(const LocToLocMap & MatchedAnchors,const AnchorMap & IRAnchors,LocToLocMap & IRToProfileLocationMap)268ac9a064cSDimitry Andric void SampleProfileMatcher::matchNonCallsiteLocs(
269ac9a064cSDimitry Andric const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
270ac9a064cSDimitry Andric LocToLocMap &IRToProfileLocationMap) {
271ac9a064cSDimitry Andric auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
272ac9a064cSDimitry Andric // Skip the unchanged location mapping to save memory.
273ac9a064cSDimitry Andric if (From != To)
274ac9a064cSDimitry Andric IRToProfileLocationMap.insert({From, To});
275ac9a064cSDimitry Andric };
276ac9a064cSDimitry Andric
277ac9a064cSDimitry Andric // Use function's beginning location as the initial anchor.
278ac9a064cSDimitry Andric int32_t LocationDelta = 0;
279ac9a064cSDimitry Andric SmallVector<LineLocation> LastMatchedNonAnchors;
280ac9a064cSDimitry Andric for (const auto &IR : IRAnchors) {
281ac9a064cSDimitry Andric const auto &Loc = IR.first;
282ac9a064cSDimitry Andric bool IsMatchedAnchor = false;
283ac9a064cSDimitry Andric // Match the anchor location in lexical order.
284ac9a064cSDimitry Andric auto R = MatchedAnchors.find(Loc);
285ac9a064cSDimitry Andric if (R != MatchedAnchors.end()) {
286ac9a064cSDimitry Andric const auto &Candidate = R->second;
287ac9a064cSDimitry Andric InsertMatching(Loc, Candidate);
288ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
289ac9a064cSDimitry Andric << " is matched from " << Loc << " to " << Candidate
290ac9a064cSDimitry Andric << "\n");
291ac9a064cSDimitry Andric LocationDelta = Candidate.LineOffset - Loc.LineOffset;
292ac9a064cSDimitry Andric
293ac9a064cSDimitry Andric // Match backwards for non-anchor locations.
294ac9a064cSDimitry Andric // The locations in LastMatchedNonAnchors have been matched forwards
295ac9a064cSDimitry Andric // based on the previous anchor, spilt it evenly and overwrite the
296ac9a064cSDimitry Andric // second half based on the current anchor.
297ac9a064cSDimitry Andric for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
298ac9a064cSDimitry Andric I < LastMatchedNonAnchors.size(); I++) {
299ac9a064cSDimitry Andric const auto &L = LastMatchedNonAnchors[I];
300ac9a064cSDimitry Andric uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
301ac9a064cSDimitry Andric LineLocation Candidate(CandidateLineOffset, L.Discriminator);
302ac9a064cSDimitry Andric InsertMatching(L, Candidate);
303ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
304ac9a064cSDimitry Andric << " to " << Candidate << "\n");
305ac9a064cSDimitry Andric }
306ac9a064cSDimitry Andric
307ac9a064cSDimitry Andric IsMatchedAnchor = true;
308ac9a064cSDimitry Andric LastMatchedNonAnchors.clear();
309ac9a064cSDimitry Andric }
310ac9a064cSDimitry Andric
311ac9a064cSDimitry Andric // Match forwards for non-anchor locations.
312ac9a064cSDimitry Andric if (!IsMatchedAnchor) {
313ac9a064cSDimitry Andric uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
314ac9a064cSDimitry Andric LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
315ac9a064cSDimitry Andric InsertMatching(Loc, Candidate);
316ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
317ac9a064cSDimitry Andric << Candidate << "\n");
318ac9a064cSDimitry Andric LastMatchedNonAnchors.emplace_back(Loc);
319ac9a064cSDimitry Andric }
320ac9a064cSDimitry Andric }
321ac9a064cSDimitry Andric }
322ac9a064cSDimitry Andric
323ac9a064cSDimitry Andric // Filter the non-call locations from IRAnchors and ProfileAnchors and write
324ac9a064cSDimitry Andric // them into a list for random access later.
getFilteredAnchorList(const AnchorMap & IRAnchors,const AnchorMap & ProfileAnchors,AnchorList & FilteredIRAnchorsList,AnchorList & FilteredProfileAnchorList)325ac9a064cSDimitry Andric void SampleProfileMatcher::getFilteredAnchorList(
326ac9a064cSDimitry Andric const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
327ac9a064cSDimitry Andric AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
328ac9a064cSDimitry Andric for (const auto &I : IRAnchors) {
329ac9a064cSDimitry Andric if (I.second.stringRef().empty())
330ac9a064cSDimitry Andric continue;
331ac9a064cSDimitry Andric FilteredIRAnchorsList.emplace_back(I);
332ac9a064cSDimitry Andric }
333ac9a064cSDimitry Andric
334ac9a064cSDimitry Andric for (const auto &I : ProfileAnchors)
335ac9a064cSDimitry Andric FilteredProfileAnchorList.emplace_back(I);
336ac9a064cSDimitry Andric }
337ac9a064cSDimitry Andric
338ac9a064cSDimitry Andric // Call target name anchor based profile fuzzy matching.
339ac9a064cSDimitry Andric // Input:
340ac9a064cSDimitry Andric // For IR locations, the anchor is the callee name of direct callsite; For
341ac9a064cSDimitry Andric // profile locations, it's the call target name for BodySamples or inlinee's
342ac9a064cSDimitry Andric // profile name for CallsiteSamples.
343ac9a064cSDimitry Andric // Matching heuristic:
344ac9a064cSDimitry Andric // First match all the anchors using the diff algorithm, then split the
345ac9a064cSDimitry Andric // non-anchor locations between the two anchors evenly, first half are matched
346ac9a064cSDimitry Andric // based on the start anchor, second half are matched based on the end anchor.
347ac9a064cSDimitry Andric // For example, given:
348ac9a064cSDimitry Andric // IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
349ac9a064cSDimitry Andric // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
350ac9a064cSDimitry Andric // The matching gives:
351ac9a064cSDimitry Andric // [1, 2(foo), 3, 5, 6(bar), 7]
352ac9a064cSDimitry Andric // | | | | | |
353ac9a064cSDimitry Andric // [1, 2, 3(foo), 4, 7, 8(bar), 9]
354ac9a064cSDimitry Andric // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
runStaleProfileMatching(const Function & F,const AnchorMap & IRAnchors,const AnchorMap & ProfileAnchors,LocToLocMap & IRToProfileLocationMap,bool RunCFGMatching,bool RunCGMatching)355ac9a064cSDimitry Andric void SampleProfileMatcher::runStaleProfileMatching(
356ac9a064cSDimitry Andric const Function &F, const AnchorMap &IRAnchors,
357ac9a064cSDimitry Andric const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
358ac9a064cSDimitry Andric bool RunCFGMatching, bool RunCGMatching) {
359ac9a064cSDimitry Andric if (!RunCFGMatching && !RunCGMatching)
360ac9a064cSDimitry Andric return;
361ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
362ac9a064cSDimitry Andric << "\n");
363ac9a064cSDimitry Andric assert(IRToProfileLocationMap.empty() &&
364ac9a064cSDimitry Andric "Run stale profile matching only once per function");
365ac9a064cSDimitry Andric
366ac9a064cSDimitry Andric AnchorList FilteredProfileAnchorList;
367ac9a064cSDimitry Andric AnchorList FilteredIRAnchorsList;
368ac9a064cSDimitry Andric getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
369ac9a064cSDimitry Andric FilteredProfileAnchorList);
370ac9a064cSDimitry Andric
371ac9a064cSDimitry Andric if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
372ac9a064cSDimitry Andric return;
373ac9a064cSDimitry Andric
374ac9a064cSDimitry Andric if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
375ac9a064cSDimitry Andric FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
376ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
377ac9a064cSDimitry Andric << " because the number of callsites in the IR is "
378ac9a064cSDimitry Andric << FilteredIRAnchorsList.size()
379ac9a064cSDimitry Andric << " and in the profile is "
380ac9a064cSDimitry Andric << FilteredProfileAnchorList.size() << "\n");
381ac9a064cSDimitry Andric return;
382ac9a064cSDimitry Andric }
383ac9a064cSDimitry Andric
384ac9a064cSDimitry Andric // Match the callsite anchors by finding the longest common subsequence
385ac9a064cSDimitry Andric // between IR and profile.
386ac9a064cSDimitry Andric // Define a match between two anchors as follows:
387ac9a064cSDimitry Andric // 1) The function names of anchors are the same.
388ac9a064cSDimitry Andric // 2) The similarity between the anchor functions is above a threshold if
389ac9a064cSDimitry Andric // RunCGMatching is set.
390ac9a064cSDimitry Andric // For 2), we only consider the anchor functions from IR and profile don't
391ac9a064cSDimitry Andric // appear on either side to reduce the matching scope. Note that we need to
392ac9a064cSDimitry Andric // use IR anchor as base(A side) to align with the order of
393ac9a064cSDimitry Andric // IRToProfileLocationMap.
394ac9a064cSDimitry Andric LocToLocMap MatchedAnchors =
395ac9a064cSDimitry Andric longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
396ac9a064cSDimitry Andric RunCGMatching /* Match unused functions */);
397ac9a064cSDimitry Andric
398ac9a064cSDimitry Andric // CFG level matching:
399ac9a064cSDimitry Andric // Apply the callsite matchings to infer matching for the basic
400ac9a064cSDimitry Andric // block(non-callsite) locations and write the result to
401ac9a064cSDimitry Andric // IRToProfileLocationMap.
402ac9a064cSDimitry Andric if (RunCFGMatching)
403ac9a064cSDimitry Andric matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
404ac9a064cSDimitry Andric }
405ac9a064cSDimitry Andric
runOnFunction(Function & F)406ac9a064cSDimitry Andric void SampleProfileMatcher::runOnFunction(Function &F) {
407ac9a064cSDimitry Andric // We need to use flattened function samples for matching.
408ac9a064cSDimitry Andric // Unlike IR, which includes all callsites from the source code, the callsites
409ac9a064cSDimitry Andric // in profile only show up when they are hit by samples, i,e. the profile
410ac9a064cSDimitry Andric // callsites in one context may differ from those in another context. To get
411ac9a064cSDimitry Andric // the maximum number of callsites, we merge the function profiles from all
412ac9a064cSDimitry Andric // contexts, aka, the flattened profile to find profile anchors.
413ac9a064cSDimitry Andric const auto *FSFlattened = getFlattenedSamplesFor(F);
414ac9a064cSDimitry Andric if (SalvageUnusedProfile && !FSFlattened) {
415ac9a064cSDimitry Andric // Apply the matching in place to find the new function's matched profile.
416ac9a064cSDimitry Andric // TODO: For extended profile format, if a function profile is unused and
417ac9a064cSDimitry Andric // it's top-level, even if the profile is matched, it's not found in the
418ac9a064cSDimitry Andric // profile. This is because sample reader only read the used profile at the
419ac9a064cSDimitry Andric // beginning, we need to support loading the profile on-demand in future.
420ac9a064cSDimitry Andric auto R = FuncToProfileNameMap.find(&F);
421ac9a064cSDimitry Andric if (R != FuncToProfileNameMap.end())
422ac9a064cSDimitry Andric FSFlattened = getFlattenedSamplesFor(R->second);
423ac9a064cSDimitry Andric }
424ac9a064cSDimitry Andric if (!FSFlattened)
425ac9a064cSDimitry Andric return;
426ac9a064cSDimitry Andric
427ac9a064cSDimitry Andric // Anchors for IR. It's a map from IR location to callee name, callee name is
428ac9a064cSDimitry Andric // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
429ac9a064cSDimitry Andric // for unknown indrect callee name.
430ac9a064cSDimitry Andric AnchorMap IRAnchors;
431ac9a064cSDimitry Andric findIRAnchors(F, IRAnchors);
432ac9a064cSDimitry Andric // Anchors for profile. It's a map from callsite location to a set of callee
433ac9a064cSDimitry Andric // name.
434ac9a064cSDimitry Andric AnchorMap ProfileAnchors;
435ac9a064cSDimitry Andric findProfileAnchors(*FSFlattened, ProfileAnchors);
436ac9a064cSDimitry Andric
437ac9a064cSDimitry Andric // Compute the callsite match states for profile staleness report.
438ac9a064cSDimitry Andric if (ReportProfileStaleness || PersistProfileStaleness)
439ac9a064cSDimitry Andric recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
440ac9a064cSDimitry Andric
441ac9a064cSDimitry Andric if (!SalvageStaleProfile)
442ac9a064cSDimitry Andric return;
443ac9a064cSDimitry Andric // For probe-based profiles, run matching only when profile checksum is
444ac9a064cSDimitry Andric // mismatched.
445ac9a064cSDimitry Andric bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
446ac9a064cSDimitry Andric !ProbeManager->profileIsValid(F, *FSFlattened);
447ac9a064cSDimitry Andric bool RunCFGMatching =
448ac9a064cSDimitry Andric !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
449ac9a064cSDimitry Andric bool RunCGMatching = SalvageUnusedProfile;
450ac9a064cSDimitry Andric // For imported functions, the checksum metadata(pseudo_probe_desc) are
451ac9a064cSDimitry Andric // dropped, so we leverage function attribute(profile-checksum-mismatch) to
452ac9a064cSDimitry Andric // transfer the info: add the attribute during pre-link phase and check it
453ac9a064cSDimitry Andric // during post-link phase(see "profileIsValid").
454ac9a064cSDimitry Andric if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
455ac9a064cSDimitry Andric F.addFnAttr("profile-checksum-mismatch");
456ac9a064cSDimitry Andric
457ac9a064cSDimitry Andric // The matching result will be saved to IRToProfileLocationMap, create a
458ac9a064cSDimitry Andric // new map for each function.
459ac9a064cSDimitry Andric auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
460ac9a064cSDimitry Andric runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
461ac9a064cSDimitry Andric RunCFGMatching, RunCGMatching);
462ac9a064cSDimitry Andric // Find and update callsite match states after matching.
463ac9a064cSDimitry Andric if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
464ac9a064cSDimitry Andric recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
465ac9a064cSDimitry Andric &IRToProfileLocationMap);
466ac9a064cSDimitry Andric }
467ac9a064cSDimitry Andric
recordCallsiteMatchStates(const Function & F,const AnchorMap & IRAnchors,const AnchorMap & ProfileAnchors,const LocToLocMap * IRToProfileLocationMap)468ac9a064cSDimitry Andric void SampleProfileMatcher::recordCallsiteMatchStates(
469ac9a064cSDimitry Andric const Function &F, const AnchorMap &IRAnchors,
470ac9a064cSDimitry Andric const AnchorMap &ProfileAnchors,
471ac9a064cSDimitry Andric const LocToLocMap *IRToProfileLocationMap) {
472ac9a064cSDimitry Andric bool IsPostMatch = IRToProfileLocationMap != nullptr;
473ac9a064cSDimitry Andric auto &CallsiteMatchStates =
474ac9a064cSDimitry Andric FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
475ac9a064cSDimitry Andric
476ac9a064cSDimitry Andric auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
477ac9a064cSDimitry Andric // IRToProfileLocationMap is null in pre-match phrase.
478ac9a064cSDimitry Andric if (!IRToProfileLocationMap)
479ac9a064cSDimitry Andric return IRLoc;
480ac9a064cSDimitry Andric const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
481ac9a064cSDimitry Andric if (ProfileLoc != IRToProfileLocationMap->end())
482ac9a064cSDimitry Andric return ProfileLoc->second;
483ac9a064cSDimitry Andric else
484ac9a064cSDimitry Andric return IRLoc;
485ac9a064cSDimitry Andric };
486ac9a064cSDimitry Andric
487ac9a064cSDimitry Andric for (const auto &I : IRAnchors) {
488ac9a064cSDimitry Andric // After fuzzy profile matching, use the matching result to remap the
489ac9a064cSDimitry Andric // current IR callsite.
490ac9a064cSDimitry Andric const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
491ac9a064cSDimitry Andric const auto &IRCalleeId = I.second;
492ac9a064cSDimitry Andric const auto &It = ProfileAnchors.find(ProfileLoc);
493ac9a064cSDimitry Andric if (It == ProfileAnchors.end())
494ac9a064cSDimitry Andric continue;
495ac9a064cSDimitry Andric const auto &ProfCalleeId = It->second;
496ac9a064cSDimitry Andric if (IRCalleeId == ProfCalleeId) {
497ac9a064cSDimitry Andric auto It = CallsiteMatchStates.find(ProfileLoc);
498ac9a064cSDimitry Andric if (It == CallsiteMatchStates.end())
499ac9a064cSDimitry Andric CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
500ac9a064cSDimitry Andric else if (IsPostMatch) {
501ac9a064cSDimitry Andric if (It->second == MatchState::InitialMatch)
502ac9a064cSDimitry Andric It->second = MatchState::UnchangedMatch;
503ac9a064cSDimitry Andric else if (It->second == MatchState::InitialMismatch)
504ac9a064cSDimitry Andric It->second = MatchState::RecoveredMismatch;
505ac9a064cSDimitry Andric }
506ac9a064cSDimitry Andric }
507ac9a064cSDimitry Andric }
508ac9a064cSDimitry Andric
509ac9a064cSDimitry Andric // Check if there are any callsites in the profile that does not match to any
510ac9a064cSDimitry Andric // IR callsites.
511ac9a064cSDimitry Andric for (const auto &I : ProfileAnchors) {
512ac9a064cSDimitry Andric const auto &Loc = I.first;
513ac9a064cSDimitry Andric assert(!I.second.stringRef().empty() && "Callees should not be empty");
514ac9a064cSDimitry Andric auto It = CallsiteMatchStates.find(Loc);
515ac9a064cSDimitry Andric if (It == CallsiteMatchStates.end())
516ac9a064cSDimitry Andric CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
517ac9a064cSDimitry Andric else if (IsPostMatch) {
518ac9a064cSDimitry Andric // Update the state if it's not matched(UnchangedMatch or
519ac9a064cSDimitry Andric // RecoveredMismatch).
520ac9a064cSDimitry Andric if (It->second == MatchState::InitialMismatch)
521ac9a064cSDimitry Andric It->second = MatchState::UnchangedMismatch;
522ac9a064cSDimitry Andric else if (It->second == MatchState::InitialMatch)
523ac9a064cSDimitry Andric It->second = MatchState::RemovedMatch;
524ac9a064cSDimitry Andric }
525ac9a064cSDimitry Andric }
526ac9a064cSDimitry Andric }
527ac9a064cSDimitry Andric
countMismatchedFuncSamples(const FunctionSamples & FS,bool IsTopLevel)528ac9a064cSDimitry Andric void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
529ac9a064cSDimitry Andric bool IsTopLevel) {
530ac9a064cSDimitry Andric const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
531ac9a064cSDimitry Andric // Skip the function that is external or renamed.
532ac9a064cSDimitry Andric if (!FuncDesc)
533ac9a064cSDimitry Andric return;
534ac9a064cSDimitry Andric
535ac9a064cSDimitry Andric if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
536ac9a064cSDimitry Andric if (IsTopLevel)
537ac9a064cSDimitry Andric NumStaleProfileFunc++;
538ac9a064cSDimitry Andric // Given currently all probe ids are after block probe ids, once the
539ac9a064cSDimitry Andric // checksum is mismatched, it's likely all the callites are mismatched and
540ac9a064cSDimitry Andric // dropped. We conservatively count all the samples as mismatched and stop
541ac9a064cSDimitry Andric // counting the inlinees' profiles.
542ac9a064cSDimitry Andric MismatchedFunctionSamples += FS.getTotalSamples();
543ac9a064cSDimitry Andric return;
544ac9a064cSDimitry Andric }
545ac9a064cSDimitry Andric
546ac9a064cSDimitry Andric // Even the current-level function checksum is matched, it's possible that the
547ac9a064cSDimitry Andric // nested inlinees' checksums are mismatched that affect the inlinee's sample
548ac9a064cSDimitry Andric // loading, we need to go deeper to check the inlinees' function samples.
549ac9a064cSDimitry Andric // Similarly, count all the samples as mismatched if the inlinee's checksum is
550ac9a064cSDimitry Andric // mismatched using this recursive function.
551ac9a064cSDimitry Andric for (const auto &I : FS.getCallsiteSamples())
552ac9a064cSDimitry Andric for (const auto &CS : I.second)
553ac9a064cSDimitry Andric countMismatchedFuncSamples(CS.second, false);
554ac9a064cSDimitry Andric }
555ac9a064cSDimitry Andric
countMismatchedCallsiteSamples(const FunctionSamples & FS)556ac9a064cSDimitry Andric void SampleProfileMatcher::countMismatchedCallsiteSamples(
557ac9a064cSDimitry Andric const FunctionSamples &FS) {
558ac9a064cSDimitry Andric auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
559ac9a064cSDimitry Andric // Skip it if no mismatched callsite or this is an external function.
560ac9a064cSDimitry Andric if (It == FuncCallsiteMatchStates.end() || It->second.empty())
561ac9a064cSDimitry Andric return;
562ac9a064cSDimitry Andric const auto &CallsiteMatchStates = It->second;
563ac9a064cSDimitry Andric
564ac9a064cSDimitry Andric auto findMatchState = [&](const LineLocation &Loc) {
565ac9a064cSDimitry Andric auto It = CallsiteMatchStates.find(Loc);
566ac9a064cSDimitry Andric if (It == CallsiteMatchStates.end())
567ac9a064cSDimitry Andric return MatchState::Unknown;
568ac9a064cSDimitry Andric return It->second;
569ac9a064cSDimitry Andric };
570ac9a064cSDimitry Andric
571ac9a064cSDimitry Andric auto AttributeMismatchedSamples = [&](const enum MatchState &State,
572ac9a064cSDimitry Andric uint64_t Samples) {
573ac9a064cSDimitry Andric if (isMismatchState(State))
574ac9a064cSDimitry Andric MismatchedCallsiteSamples += Samples;
575ac9a064cSDimitry Andric else if (State == MatchState::RecoveredMismatch)
576ac9a064cSDimitry Andric RecoveredCallsiteSamples += Samples;
577ac9a064cSDimitry Andric };
578ac9a064cSDimitry Andric
579ac9a064cSDimitry Andric // The non-inlined callsites are saved in the body samples of function
580ac9a064cSDimitry Andric // profile, go through it to count the non-inlined callsite samples.
581ac9a064cSDimitry Andric for (const auto &I : FS.getBodySamples())
582ac9a064cSDimitry Andric AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
583ac9a064cSDimitry Andric
584ac9a064cSDimitry Andric // Count the inlined callsite samples.
585ac9a064cSDimitry Andric for (const auto &I : FS.getCallsiteSamples()) {
586ac9a064cSDimitry Andric auto State = findMatchState(I.first);
587ac9a064cSDimitry Andric uint64_t CallsiteSamples = 0;
588ac9a064cSDimitry Andric for (const auto &CS : I.second)
589ac9a064cSDimitry Andric CallsiteSamples += CS.second.getTotalSamples();
590ac9a064cSDimitry Andric AttributeMismatchedSamples(State, CallsiteSamples);
591ac9a064cSDimitry Andric
592ac9a064cSDimitry Andric if (isMismatchState(State))
593ac9a064cSDimitry Andric continue;
594ac9a064cSDimitry Andric
595ac9a064cSDimitry Andric // When the current level of inlined call site matches the profiled call
596ac9a064cSDimitry Andric // site, we need to go deeper along the inline tree to count mismatches from
597ac9a064cSDimitry Andric // lower level inlinees.
598ac9a064cSDimitry Andric for (const auto &CS : I.second)
599ac9a064cSDimitry Andric countMismatchedCallsiteSamples(CS.second);
600ac9a064cSDimitry Andric }
601ac9a064cSDimitry Andric }
602ac9a064cSDimitry Andric
countMismatchCallsites(const FunctionSamples & FS)603ac9a064cSDimitry Andric void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
604ac9a064cSDimitry Andric auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
605ac9a064cSDimitry Andric // Skip it if no mismatched callsite or this is an external function.
606ac9a064cSDimitry Andric if (It == FuncCallsiteMatchStates.end() || It->second.empty())
607ac9a064cSDimitry Andric return;
608ac9a064cSDimitry Andric const auto &MatchStates = It->second;
609ac9a064cSDimitry Andric [[maybe_unused]] bool OnInitialState =
610ac9a064cSDimitry Andric isInitialState(MatchStates.begin()->second);
611ac9a064cSDimitry Andric for (const auto &I : MatchStates) {
612ac9a064cSDimitry Andric TotalProfiledCallsites++;
613ac9a064cSDimitry Andric assert(
614ac9a064cSDimitry Andric (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
615ac9a064cSDimitry Andric "Profile matching state is inconsistent");
616ac9a064cSDimitry Andric
617ac9a064cSDimitry Andric if (isMismatchState(I.second))
618ac9a064cSDimitry Andric NumMismatchedCallsites++;
619ac9a064cSDimitry Andric else if (I.second == MatchState::RecoveredMismatch)
620ac9a064cSDimitry Andric NumRecoveredCallsites++;
621ac9a064cSDimitry Andric }
622ac9a064cSDimitry Andric }
623ac9a064cSDimitry Andric
countCallGraphRecoveredSamples(const FunctionSamples & FS,std::unordered_set<FunctionId> & CallGraphRecoveredProfiles)624ac9a064cSDimitry Andric void SampleProfileMatcher::countCallGraphRecoveredSamples(
625ac9a064cSDimitry Andric const FunctionSamples &FS,
626ac9a064cSDimitry Andric std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
627ac9a064cSDimitry Andric if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
628ac9a064cSDimitry Andric NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
629ac9a064cSDimitry Andric return;
630ac9a064cSDimitry Andric }
631ac9a064cSDimitry Andric
632ac9a064cSDimitry Andric for (const auto &CM : FS.getCallsiteSamples()) {
633ac9a064cSDimitry Andric for (const auto &CS : CM.second) {
634ac9a064cSDimitry Andric countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
635ac9a064cSDimitry Andric }
636ac9a064cSDimitry Andric }
637ac9a064cSDimitry Andric }
638ac9a064cSDimitry Andric
computeAndReportProfileStaleness()639ac9a064cSDimitry Andric void SampleProfileMatcher::computeAndReportProfileStaleness() {
640ac9a064cSDimitry Andric if (!ReportProfileStaleness && !PersistProfileStaleness)
641ac9a064cSDimitry Andric return;
642ac9a064cSDimitry Andric
643ac9a064cSDimitry Andric std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
644ac9a064cSDimitry Andric if (SalvageUnusedProfile) {
645ac9a064cSDimitry Andric for (const auto &I : FuncToProfileNameMap) {
646ac9a064cSDimitry Andric CallGraphRecoveredProfiles.insert(I.second);
647ac9a064cSDimitry Andric if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
648ac9a064cSDimitry Andric continue;
649ac9a064cSDimitry Andric NumCallGraphRecoveredProfiledFunc++;
650ac9a064cSDimitry Andric }
651ac9a064cSDimitry Andric }
652ac9a064cSDimitry Andric
653ac9a064cSDimitry Andric // Count profile mismatches for profile staleness report.
654ac9a064cSDimitry Andric for (const auto &F : M) {
655ac9a064cSDimitry Andric if (skipProfileForFunction(F))
656ac9a064cSDimitry Andric continue;
657ac9a064cSDimitry Andric // As the stats will be merged by linker, skip reporting the metrics for
658ac9a064cSDimitry Andric // imported functions to avoid repeated counting.
659ac9a064cSDimitry Andric if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
660ac9a064cSDimitry Andric continue;
661ac9a064cSDimitry Andric const auto *FS = Reader.getSamplesFor(F);
662ac9a064cSDimitry Andric if (!FS)
663ac9a064cSDimitry Andric continue;
664ac9a064cSDimitry Andric TotalProfiledFunc++;
665ac9a064cSDimitry Andric TotalFunctionSamples += FS->getTotalSamples();
666ac9a064cSDimitry Andric
667ac9a064cSDimitry Andric if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
668ac9a064cSDimitry Andric countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
669ac9a064cSDimitry Andric
670ac9a064cSDimitry Andric // Checksum mismatch is only used in pseudo-probe mode.
671ac9a064cSDimitry Andric if (FunctionSamples::ProfileIsProbeBased)
672ac9a064cSDimitry Andric countMismatchedFuncSamples(*FS, true);
673ac9a064cSDimitry Andric
674ac9a064cSDimitry Andric // Count mismatches and samples for calliste.
675ac9a064cSDimitry Andric countMismatchCallsites(*FS);
676ac9a064cSDimitry Andric countMismatchedCallsiteSamples(*FS);
677ac9a064cSDimitry Andric }
678ac9a064cSDimitry Andric
679ac9a064cSDimitry Andric if (ReportProfileStaleness) {
680ac9a064cSDimitry Andric if (FunctionSamples::ProfileIsProbeBased) {
681ac9a064cSDimitry Andric errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
682ac9a064cSDimitry Andric << ") of functions' profile are invalid and ("
683ac9a064cSDimitry Andric << MismatchedFunctionSamples << "/" << TotalFunctionSamples
684ac9a064cSDimitry Andric << ") of samples are discarded due to function hash mismatch.\n";
685ac9a064cSDimitry Andric }
686ac9a064cSDimitry Andric if (SalvageUnusedProfile) {
687ac9a064cSDimitry Andric errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
688ac9a064cSDimitry Andric << TotalProfiledFunc << ") of functions' profile are matched and ("
689ac9a064cSDimitry Andric << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
690ac9a064cSDimitry Andric << ") of samples are reused by call graph matching.\n";
691ac9a064cSDimitry Andric }
692ac9a064cSDimitry Andric
693ac9a064cSDimitry Andric errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
694ac9a064cSDimitry Andric << TotalProfiledCallsites
695ac9a064cSDimitry Andric << ") of callsites' profile are invalid and ("
696ac9a064cSDimitry Andric << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
697ac9a064cSDimitry Andric << TotalFunctionSamples
698ac9a064cSDimitry Andric << ") of samples are discarded due to callsite location mismatch.\n";
699ac9a064cSDimitry Andric errs() << "(" << NumRecoveredCallsites << "/"
700ac9a064cSDimitry Andric << (NumRecoveredCallsites + NumMismatchedCallsites)
701ac9a064cSDimitry Andric << ") of callsites and (" << RecoveredCallsiteSamples << "/"
702ac9a064cSDimitry Andric << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
703ac9a064cSDimitry Andric << ") of samples are recovered by stale profile matching.\n";
704ac9a064cSDimitry Andric }
705ac9a064cSDimitry Andric
706ac9a064cSDimitry Andric if (PersistProfileStaleness) {
707ac9a064cSDimitry Andric LLVMContext &Ctx = M.getContext();
708ac9a064cSDimitry Andric MDBuilder MDB(Ctx);
709ac9a064cSDimitry Andric
710ac9a064cSDimitry Andric SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
711ac9a064cSDimitry Andric if (FunctionSamples::ProfileIsProbeBased) {
712ac9a064cSDimitry Andric ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
713ac9a064cSDimitry Andric ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
714ac9a064cSDimitry Andric ProfStatsVec.emplace_back("MismatchedFunctionSamples",
715ac9a064cSDimitry Andric MismatchedFunctionSamples);
716ac9a064cSDimitry Andric ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
717ac9a064cSDimitry Andric }
718ac9a064cSDimitry Andric
719ac9a064cSDimitry Andric if (SalvageUnusedProfile) {
720ac9a064cSDimitry Andric ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
721ac9a064cSDimitry Andric NumCallGraphRecoveredProfiledFunc);
722ac9a064cSDimitry Andric ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
723ac9a064cSDimitry Andric NumCallGraphRecoveredFuncSamples);
724ac9a064cSDimitry Andric }
725ac9a064cSDimitry Andric
726ac9a064cSDimitry Andric ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
727ac9a064cSDimitry Andric ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
728ac9a064cSDimitry Andric ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
729ac9a064cSDimitry Andric ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
730ac9a064cSDimitry Andric MismatchedCallsiteSamples);
731ac9a064cSDimitry Andric ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
732ac9a064cSDimitry Andric RecoveredCallsiteSamples);
733ac9a064cSDimitry Andric
734ac9a064cSDimitry Andric auto *MD = MDB.createLLVMStats(ProfStatsVec);
735ac9a064cSDimitry Andric auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
736ac9a064cSDimitry Andric NMD->addOperand(MD);
737ac9a064cSDimitry Andric }
738ac9a064cSDimitry Andric }
739ac9a064cSDimitry Andric
findFunctionsWithoutProfile()740ac9a064cSDimitry Andric void SampleProfileMatcher::findFunctionsWithoutProfile() {
741ac9a064cSDimitry Andric // TODO: Support MD5 profile.
742ac9a064cSDimitry Andric if (FunctionSamples::UseMD5)
743ac9a064cSDimitry Andric return;
744ac9a064cSDimitry Andric StringSet<> NamesInProfile;
745ac9a064cSDimitry Andric if (auto NameTable = Reader.getNameTable()) {
746ac9a064cSDimitry Andric for (auto Name : *NameTable)
747ac9a064cSDimitry Andric NamesInProfile.insert(Name.stringRef());
748ac9a064cSDimitry Andric }
749ac9a064cSDimitry Andric
750ac9a064cSDimitry Andric for (auto &F : M) {
751ac9a064cSDimitry Andric // Skip declarations, as even if the function can be matched, we have
752ac9a064cSDimitry Andric // nothing to do with it.
753ac9a064cSDimitry Andric if (F.isDeclaration())
754ac9a064cSDimitry Andric continue;
755ac9a064cSDimitry Andric
756ac9a064cSDimitry Andric StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
757ac9a064cSDimitry Andric const auto *FS = getFlattenedSamplesFor(F);
758ac9a064cSDimitry Andric if (FS)
759ac9a064cSDimitry Andric continue;
760ac9a064cSDimitry Andric
761ac9a064cSDimitry Andric // For extended binary, functions fully inlined may not be loaded in the
762ac9a064cSDimitry Andric // top-level profile, so check the NameTable which has the all symbol names
763ac9a064cSDimitry Andric // in profile.
764ac9a064cSDimitry Andric if (NamesInProfile.count(CanonFName))
765ac9a064cSDimitry Andric continue;
766ac9a064cSDimitry Andric
767ac9a064cSDimitry Andric // For extended binary, non-profiled function symbols are in the profile
768ac9a064cSDimitry Andric // symbol list table.
769ac9a064cSDimitry Andric if (PSL && PSL->contains(CanonFName))
770ac9a064cSDimitry Andric continue;
771ac9a064cSDimitry Andric
772ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Function " << CanonFName
773ac9a064cSDimitry Andric << " is not in profile or profile symbol list.\n");
774ac9a064cSDimitry Andric FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
775ac9a064cSDimitry Andric }
776ac9a064cSDimitry Andric }
777ac9a064cSDimitry Andric
functionMatchesProfileHelper(const Function & IRFunc,const FunctionId & ProfFunc)778ac9a064cSDimitry Andric bool SampleProfileMatcher::functionMatchesProfileHelper(
779ac9a064cSDimitry Andric const Function &IRFunc, const FunctionId &ProfFunc) {
780ac9a064cSDimitry Andric // The value is in the range [0, 1]. The bigger the value is, the more similar
781ac9a064cSDimitry Andric // two sequences are.
782ac9a064cSDimitry Andric float Similarity = 0.0;
783ac9a064cSDimitry Andric
784ac9a064cSDimitry Andric const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
785ac9a064cSDimitry Andric if (!FSFlattened)
786ac9a064cSDimitry Andric return false;
787ac9a064cSDimitry Andric // The check for similarity or checksum may not be reliable if the function is
788ac9a064cSDimitry Andric // tiny, we use the number of basic block as a proxy for the function
789ac9a064cSDimitry Andric // complexity and skip the matching if it's too small.
790ac9a064cSDimitry Andric if (IRFunc.size() < MinFuncCountForCGMatching ||
791ac9a064cSDimitry Andric FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
792ac9a064cSDimitry Andric return false;
793ac9a064cSDimitry Andric
794ac9a064cSDimitry Andric // For probe-based function, we first trust the checksum info. If the checksum
795ac9a064cSDimitry Andric // doesn't match, we continue checking for similarity.
796ac9a064cSDimitry Andric if (FunctionSamples::ProfileIsProbeBased) {
797ac9a064cSDimitry Andric const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
798ac9a064cSDimitry Andric if (FuncDesc &&
799ac9a064cSDimitry Andric !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
800ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
801ac9a064cSDimitry Andric << "(IR) and " << ProfFunc << "(Profile) match.\n");
802ac9a064cSDimitry Andric
803ac9a064cSDimitry Andric return true;
804ac9a064cSDimitry Andric }
805ac9a064cSDimitry Andric }
806ac9a064cSDimitry Andric
807ac9a064cSDimitry Andric AnchorMap IRAnchors;
808ac9a064cSDimitry Andric findIRAnchors(IRFunc, IRAnchors);
809ac9a064cSDimitry Andric AnchorMap ProfileAnchors;
810ac9a064cSDimitry Andric findProfileAnchors(*FSFlattened, ProfileAnchors);
811ac9a064cSDimitry Andric
812ac9a064cSDimitry Andric AnchorList FilteredIRAnchorsList;
813ac9a064cSDimitry Andric AnchorList FilteredProfileAnchorList;
814ac9a064cSDimitry Andric getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
815ac9a064cSDimitry Andric FilteredProfileAnchorList);
816ac9a064cSDimitry Andric
817ac9a064cSDimitry Andric // Similarly skip the matching if the num of anchors is not enough.
818ac9a064cSDimitry Andric if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
819ac9a064cSDimitry Andric FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
820ac9a064cSDimitry Andric return false;
821ac9a064cSDimitry Andric
822ac9a064cSDimitry Andric // Use the diff algorithm to find the LCS between IR and profile.
823ac9a064cSDimitry Andric
824ac9a064cSDimitry Andric // Don't recursively match the callee function to avoid infinite matching,
825ac9a064cSDimitry Andric // callee functions will be handled later since it's processed in top-down
826ac9a064cSDimitry Andric // order .
827ac9a064cSDimitry Andric LocToLocMap MatchedAnchors =
828ac9a064cSDimitry Andric longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
829ac9a064cSDimitry Andric false /* Match unused functions */);
830ac9a064cSDimitry Andric
831ac9a064cSDimitry Andric Similarity =
832ac9a064cSDimitry Andric static_cast<float>(MatchedAnchors.size()) * 2 /
833ac9a064cSDimitry Andric (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
834ac9a064cSDimitry Andric
835ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
836ac9a064cSDimitry Andric << "(IR) and " << ProfFunc << "(profile) is "
837ac9a064cSDimitry Andric << format("%.2f", Similarity) << "\n");
838ac9a064cSDimitry Andric assert((Similarity >= 0 && Similarity <= 1.0) &&
839ac9a064cSDimitry Andric "Similarity value should be in [0, 1]");
840ac9a064cSDimitry Andric return Similarity * 100 > FuncProfileSimilarityThreshold;
841ac9a064cSDimitry Andric }
842ac9a064cSDimitry Andric
843ac9a064cSDimitry Andric // If FindMatchedProfileOnly is set to true, only use the processed function
844ac9a064cSDimitry Andric // results. This is used for skipping the repeated recursive matching.
functionMatchesProfile(Function & IRFunc,const FunctionId & ProfFunc,bool FindMatchedProfileOnly)845ac9a064cSDimitry Andric bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
846ac9a064cSDimitry Andric const FunctionId &ProfFunc,
847ac9a064cSDimitry Andric bool FindMatchedProfileOnly) {
848ac9a064cSDimitry Andric auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
849ac9a064cSDimitry Andric if (R != FuncProfileMatchCache.end())
850ac9a064cSDimitry Andric return R->second;
851ac9a064cSDimitry Andric
852ac9a064cSDimitry Andric if (FindMatchedProfileOnly)
853ac9a064cSDimitry Andric return false;
854ac9a064cSDimitry Andric
855ac9a064cSDimitry Andric bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
856ac9a064cSDimitry Andric FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
857ac9a064cSDimitry Andric if (Matched) {
858ac9a064cSDimitry Andric FuncToProfileNameMap[&IRFunc] = ProfFunc;
859ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
860ac9a064cSDimitry Andric << " matches profile:" << ProfFunc << "\n");
861ac9a064cSDimitry Andric }
862ac9a064cSDimitry Andric
863ac9a064cSDimitry Andric return Matched;
864ac9a064cSDimitry Andric }
865ac9a064cSDimitry Andric
runOnModule()866ac9a064cSDimitry Andric void SampleProfileMatcher::runOnModule() {
867ac9a064cSDimitry Andric ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
868ac9a064cSDimitry Andric FunctionSamples::ProfileIsCS);
869ac9a064cSDimitry Andric if (SalvageUnusedProfile)
870ac9a064cSDimitry Andric findFunctionsWithoutProfile();
871ac9a064cSDimitry Andric
872ac9a064cSDimitry Andric // Process the matching in top-down order so that the caller matching result
873ac9a064cSDimitry Andric // can be used to the callee matching.
874ac9a064cSDimitry Andric std::vector<Function *> TopDownFunctionList;
875ac9a064cSDimitry Andric TopDownFunctionList.reserve(M.size());
876ac9a064cSDimitry Andric buildTopDownFuncOrder(CG, TopDownFunctionList);
877ac9a064cSDimitry Andric for (auto *F : TopDownFunctionList) {
878ac9a064cSDimitry Andric if (skipProfileForFunction(*F))
879ac9a064cSDimitry Andric continue;
880ac9a064cSDimitry Andric runOnFunction(*F);
881ac9a064cSDimitry Andric }
882ac9a064cSDimitry Andric
883ac9a064cSDimitry Andric // Update the data in SampleLoader.
884ac9a064cSDimitry Andric if (SalvageUnusedProfile)
885ac9a064cSDimitry Andric for (auto &I : FuncToProfileNameMap) {
886ac9a064cSDimitry Andric assert(I.first && "New function is null");
887ac9a064cSDimitry Andric FunctionId FuncName(I.first->getName());
888ac9a064cSDimitry Andric FuncNameToProfNameMap->emplace(FuncName, I.second);
889ac9a064cSDimitry Andric // We need to remove the old entry to avoid duplicating the function
890ac9a064cSDimitry Andric // processing.
891ac9a064cSDimitry Andric SymbolMap->erase(FuncName);
892ac9a064cSDimitry Andric SymbolMap->emplace(I.second, I.first);
893ac9a064cSDimitry Andric }
894ac9a064cSDimitry Andric
895ac9a064cSDimitry Andric if (SalvageStaleProfile)
896ac9a064cSDimitry Andric distributeIRToProfileLocationMap();
897ac9a064cSDimitry Andric
898ac9a064cSDimitry Andric computeAndReportProfileStaleness();
899ac9a064cSDimitry Andric }
900ac9a064cSDimitry Andric
distributeIRToProfileLocationMap(FunctionSamples & FS)901ac9a064cSDimitry Andric void SampleProfileMatcher::distributeIRToProfileLocationMap(
902ac9a064cSDimitry Andric FunctionSamples &FS) {
903ac9a064cSDimitry Andric const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
904ac9a064cSDimitry Andric if (ProfileMappings != FuncMappings.end()) {
905ac9a064cSDimitry Andric FS.setIRToProfileLocationMap(&(ProfileMappings->second));
906ac9a064cSDimitry Andric }
907ac9a064cSDimitry Andric
908ac9a064cSDimitry Andric for (auto &Callees :
909ac9a064cSDimitry Andric const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
910ac9a064cSDimitry Andric for (auto &FS : Callees.second) {
911ac9a064cSDimitry Andric distributeIRToProfileLocationMap(FS.second);
912ac9a064cSDimitry Andric }
913ac9a064cSDimitry Andric }
914ac9a064cSDimitry Andric }
915ac9a064cSDimitry Andric
916ac9a064cSDimitry Andric // Use a central place to distribute the matching results. Outlined and inlined
917ac9a064cSDimitry Andric // profile with the function name will be set to the same pointer.
distributeIRToProfileLocationMap()918ac9a064cSDimitry Andric void SampleProfileMatcher::distributeIRToProfileLocationMap() {
919ac9a064cSDimitry Andric for (auto &I : Reader.getProfiles()) {
920ac9a064cSDimitry Andric distributeIRToProfileLocationMap(I.second);
921ac9a064cSDimitry Andric }
922ac9a064cSDimitry Andric }
923