llvm-project
301 строка · 10.6 Кб
1//===- bolt/Core/BinaryFunctionCallGraph.cpp ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the BinaryFunctionCallGraph class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/BinaryFunctionCallGraph.h"
14#include "bolt/Core/BinaryContext.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/Timer.h"
18#include <stack>
19
20#define DEBUG_TYPE "callgraph"
21
22using namespace llvm;
23
24namespace opts {
25
26extern cl::opt<bool> TimeOpts;
27extern cl::opt<unsigned> Verbosity;
28extern cl::OptionCategory BoltCategory;
29
30static cl::opt<std::string>
31DumpCGDot("dump-cg", cl::desc("dump callgraph to the given file"),
32cl::cat(BoltCategory));
33
34} // namespace opts
35
36namespace llvm {
37namespace bolt {
38
39CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
40uint32_t Size,
41uint64_t Samples) {
42NodeId Id = CallGraph::addNode(Size, Samples);
43assert(size_t(Id) == Funcs.size());
44Funcs.push_back(BF);
45FuncToNodeId[BF] = Id;
46assert(Funcs[Id] == BF);
47return Id;
48}
49
50std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
51NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
52"CG breakdown", "CG breakdown", opts::TimeOpts);
53std::deque<BinaryFunction *> TopologicalOrder;
54enum NodeStatus { NEW, VISITING, VISITED };
55std::vector<NodeStatus> NodeStatus(Funcs.size());
56std::stack<NodeId> Worklist;
57
58for (BinaryFunction *Func : Funcs) {
59auto It = FuncToNodeId.find(Func);
60assert(It != FuncToNodeId.end());
61const NodeId Id = It->second;
62Worklist.push(Id);
63NodeStatus[Id] = NEW;
64}
65
66while (!Worklist.empty()) {
67const NodeId FuncId = Worklist.top();
68Worklist.pop();
69
70if (NodeStatus[FuncId] == VISITED)
71continue;
72
73if (NodeStatus[FuncId] == VISITING) {
74TopologicalOrder.push_back(Funcs[FuncId]);
75NodeStatus[FuncId] = VISITED;
76continue;
77}
78
79assert(NodeStatus[FuncId] == NEW);
80NodeStatus[FuncId] = VISITING;
81Worklist.push(FuncId);
82for (const NodeId Callee : successors(FuncId)) {
83if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
84continue;
85Worklist.push(Callee);
86}
87}
88
89return TopologicalOrder;
90}
91
92BinaryFunctionCallGraph
93buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
94bool IncludeSplitCalls, bool UseFunctionHotSize,
95bool UseSplitHotSize, bool UseEdgeCounts,
96bool IgnoreRecursiveCalls) {
97NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
98"CG breakdown", opts::TimeOpts);
99BinaryFunctionCallGraph Cg;
100static constexpr uint64_t COUNT_NO_PROFILE =
101BinaryBasicBlock::COUNT_NO_PROFILE;
102
103// Compute function size
104auto functionSize = [&](const BinaryFunction *Function) {
105return UseFunctionHotSize && Function->isSplit()
106? Function->estimateHotSize(UseSplitHotSize)
107: Function->estimateSize();
108};
109
110// Add call graph nodes.
111auto lookupNode = [&](BinaryFunction *Function) {
112const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
113if (Id == CallGraph::InvalidId) {
114// It's ok to use the hot size here when the function is split. This is
115// because emitFunctions will emit the hot part first in the order that is
116// computed by ReorderFunctions. The cold part will be emitted with the
117// rest of the cold functions and code.
118const size_t Size = functionSize(Function);
119// NOTE: for functions without a profile, we set the number of samples
120// to zero. This will keep these functions from appearing in the hot
121// section. This is a little weird because we wouldn't be trying to
122// create a node for a function unless it was the target of a call from
123// a hot block. The alternative would be to set the count to one or
124// accumulate the number of calls from the callsite into the function
125// samples. Results from perfomance testing seem to favor the zero
126// count though, so I'm leaving it this way for now.
127return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
128}
129return Id;
130};
131
132// Add call graph edges.
133uint64_t NotProcessed = 0;
134uint64_t TotalCallsites = 0;
135uint64_t NoProfileCallsites = 0;
136uint64_t NumFallbacks = 0;
137uint64_t RecursiveCallsites = 0;
138for (auto &It : BC.getBinaryFunctions()) {
139BinaryFunction *Function = &It.second;
140
141if (Filter(*Function))
142continue;
143
144const CallGraph::NodeId SrcId = lookupNode(Function);
145// Offset of the current basic block from the beginning of the function
146uint64_t Offset = 0;
147
148auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
149BinaryFunction *DstFunc =
150DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr;
151if (!DstFunc) {
152LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
153<< "BOLT-DEBUG: buildCallGraph: no function for symbol\n");
154return false;
155}
156
157if (DstFunc == Function) {
158LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
159<< *DstFunc << "\n");
160++RecursiveCallsites;
161if (IgnoreRecursiveCalls)
162return false;
163}
164if (Filter(*DstFunc)) {
165LLVM_DEBUG(if (opts::Verbosity > 1) dbgs()
166<< "BOLT-DEBUG: buildCallGraph: filtered " << *DstFunc
167<< '\n');
168return false;
169}
170
171const CallGraph::NodeId DstId = lookupNode(DstFunc);
172const bool IsValidCount = Count != COUNT_NO_PROFILE;
173const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
174if (!IsValidCount)
175++NoProfileCallsites;
176Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
177LLVM_DEBUG(if (opts::Verbosity > 1) {
178dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
179<< *DstFunc << " @ " << Offset << "\n";
180});
181return true;
182};
183
184// Pairs of (symbol, count) for each target at this callsite.
185using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
186using CallInfoTy = std::vector<TargetDesc>;
187
188// Get pairs of (symbol, count) for each target at this callsite.
189// If the call is to an unknown function the symbol will be nullptr.
190// If there is no profiling data the count will be COUNT_NO_PROFILE.
191auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
192CallInfoTy Counts;
193const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
194
195// If this is an indirect call use perf data directly.
196if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
197const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
198Inst, "CallProfile");
199for (const IndirectCallProfile &CSI : ICSP)
200if (CSI.Symbol)
201Counts.emplace_back(CSI.Symbol, CSI.Count);
202} else {
203const uint64_t Count = BB->getExecutionCount();
204Counts.emplace_back(DstSym, Count);
205}
206
207return Counts;
208};
209
210// If the function has an invalid profile, try to use the perf data
211// directly (if requested). If there is no perf data for this function,
212// fall back to the CFG walker which attempts to handle missing data.
213if (!Function->hasValidProfile() && CgFromPerfData &&
214!Function->getAllCallSites().empty()) {
215LLVM_DEBUG(
216dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
217<< " for " << *Function << "\n");
218++NumFallbacks;
219const size_t Size = functionSize(Function);
220for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
221++TotalCallsites;
222
223if (!CSI.Symbol)
224continue;
225
226// The computed offset may exceed the hot part of the function; hence,
227// bound it by the size.
228Offset = CSI.Offset;
229if (Offset > Size)
230Offset = Size;
231
232if (!recordCall(CSI.Symbol, CSI.Count))
233++NotProcessed;
234}
235} else {
236for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
237// Don't count calls from split blocks unless requested.
238if (BB->isSplit() && !IncludeSplitCalls)
239continue;
240
241// Determine whether the block is included in Function's (hot) size
242// See BinaryFunction::estimateHotSize
243bool BBIncludedInFunctionSize = false;
244if (UseFunctionHotSize && Function->isSplit()) {
245if (UseSplitHotSize)
246BBIncludedInFunctionSize = !BB->isSplit();
247else
248BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
249} else {
250BBIncludedInFunctionSize = true;
251}
252
253for (MCInst &Inst : *BB) {
254// Find call instructions and extract target symbols from each one.
255if (BC.MIB->isCall(Inst)) {
256const CallInfoTy CallInfo = getCallInfo(BB, Inst);
257
258if (!CallInfo.empty()) {
259for (const TargetDesc &CI : CallInfo) {
260++TotalCallsites;
261if (!recordCall(CI.first, CI.second))
262++NotProcessed;
263}
264} else {
265++TotalCallsites;
266++NotProcessed;
267}
268}
269// Increase Offset if needed
270if (BBIncludedInFunctionSize)
271Offset += BC.computeCodeSize(&Inst, &Inst + 1);
272}
273}
274}
275}
276
277#ifndef NDEBUG
278bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
279#else
280bool PrintInfo = false;
281#endif
282if (PrintInfo || opts::Verbosity > 0)
283BC.outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
284"(%u recursive), density = %.6lf, %u callsites not "
285"processed, %u callsites with invalid profile, "
286"used perf data for %u stale functions.\n",
287Cg.numNodes(), TotalCallsites, RecursiveCallsites,
288Cg.density(), NotProcessed, NoProfileCallsites,
289NumFallbacks);
290
291if (opts::DumpCGDot.getNumOccurrences()) {
292Cg.printDot(opts::DumpCGDot, [&](CallGraph::NodeId Id) {
293return Cg.nodeIdToFunc(Id)->getPrintName();
294});
295}
296
297return Cg;
298}
299
300} // namespace bolt
301} // namespace llvm
302