llvm-project
530 строк · 19.5 Кб
1//===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements ReorderFunctions class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/ReorderFunctions.h"
14#include "bolt/Passes/HFSort.h"
15#include "bolt/Utils/Utils.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Transforms/Utils/CodeLayout.h"
19#include <fstream>
20
21#define DEBUG_TYPE "hfsort"
22
23using namespace llvm;
24
25namespace opts {
26
27extern cl::OptionCategory BoltOptCategory;
28extern cl::opt<unsigned> Verbosity;
29extern cl::opt<uint32_t> RandomSeed;
30
31extern size_t padFunction(const bolt::BinaryFunction &Function);
32
33extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
34cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
35"reorder-functions",
36cl::desc("reorder and cluster functions (works only with relocations)"),
37cl::init(bolt::ReorderFunctions::RT_NONE),
38cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
39"do not reorder functions"),
40clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
41"order by execution count"),
42clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
43"use hfsort algorithm"),
44clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
45"use cache-directed sort"),
46clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort",
47"use cache-directed sort"),
48clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
49"pettis-hansen", "use Pettis-Hansen algorithm"),
50clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
51"reorder functions randomly"),
52clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
53"use function order specified by -function-order")),
54cl::ZeroOrMore, cl::cat(BoltOptCategory),
55cl::callback([](const bolt::ReorderFunctions::ReorderType &option) {
56if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) {
57errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
58<< " please use '-reorder-functions=cdsort' instead\n";
59ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT;
60}
61}));
62
63static cl::opt<bool> ReorderFunctionsUseHotSize(
64"reorder-functions-use-hot-size",
65cl::desc("use a function's hot size when doing clustering"), cl::init(true),
66cl::cat(BoltOptCategory));
67
68static cl::opt<std::string> FunctionOrderFile(
69"function-order",
70cl::desc("file containing an ordered list of functions to use for function "
71"reordering"),
72cl::cat(BoltOptCategory));
73
74static cl::opt<std::string> GenerateFunctionOrderFile(
75"generate-function-order",
76cl::desc("file to dump the ordered list of functions to use for function "
77"reordering"),
78cl::cat(BoltOptCategory));
79
80static cl::opt<std::string> LinkSectionsFile(
81"generate-link-sections",
82cl::desc("generate a list of function sections in a format suitable for "
83"inclusion in a linker script"),
84cl::cat(BoltOptCategory));
85
86static cl::opt<bool>
87UseEdgeCounts("use-edge-counts",
88cl::desc("use edge count data when doing clustering"),
89cl::init(true), cl::cat(BoltOptCategory));
90
91static cl::opt<bool> CgFromPerfData(
92"cg-from-perf-data",
93cl::desc("use perf data directly when constructing the call graph"
94" for stale functions"),
95cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
96
97static cl::opt<bool> CgIgnoreRecursiveCalls(
98"cg-ignore-recursive-calls",
99cl::desc("ignore recursive calls when constructing the call graph"),
100cl::init(true), cl::cat(BoltOptCategory));
101
102static cl::opt<bool> CgUseSplitHotSize(
103"cg-use-split-hot-size",
104cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
105"call graph functions"),
106cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
107
108} // namespace opts
109
110namespace llvm {
111namespace bolt {
112
113using NodeId = CallGraph::NodeId;
114using Arc = CallGraph::Arc;
115using Node = CallGraph::Node;
116
117void ReorderFunctions::reorder(BinaryContext &BC,
118std::vector<Cluster> &&Clusters,
119std::map<uint64_t, BinaryFunction> &BFs) {
120std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
121uint64_t TotalSize = 0;
122uint32_t Index = 0;
123
124// Set order of hot functions based on clusters.
125for (const Cluster &Cluster : Clusters) {
126for (const NodeId FuncId : Cluster.targets()) {
127Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
128FuncAddr[FuncId] = TotalSize;
129TotalSize += Cg.size(FuncId);
130}
131}
132
133// Assign valid index for functions with valid profile.
134for (auto &It : BFs) {
135BinaryFunction &BF = It.second;
136if (!BF.hasValidIndex() && BF.hasValidProfile())
137BF.setIndex(Index++);
138}
139
140if (opts::ReorderFunctions == RT_NONE)
141return;
142
143printStats(BC, Clusters, FuncAddr);
144}
145
146void ReorderFunctions::printStats(BinaryContext &BC,
147const std::vector<Cluster> &Clusters,
148const std::vector<uint64_t> &FuncAddr) {
149if (opts::Verbosity == 0) {
150#ifndef NDEBUG
151if (!DebugFlag || !isCurrentDebugType("hfsort"))
152return;
153#else
154return;
155#endif
156}
157
158bool PrintDetailed = opts::Verbosity > 1;
159#ifndef NDEBUG
160PrintDetailed |=
161(DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
162#endif
163uint64_t TotalSize = 0;
164uint64_t CurPage = 0;
165uint64_t Hotfuncs = 0;
166double TotalDistance = 0;
167double TotalCalls = 0;
168double TotalCalls64B = 0;
169double TotalCalls4KB = 0;
170double TotalCalls2MB = 0;
171if (PrintDetailed)
172BC.outs() << "BOLT-INFO: Function reordering page layout\n"
173<< "BOLT-INFO: ============== page 0 ==============\n";
174for (const Cluster &Cluster : Clusters) {
175if (PrintDetailed)
176BC.outs() << format(
177"BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
178Cluster.density(), Cluster.samples(), Cluster.size());
179
180for (NodeId FuncId : Cluster.targets()) {
181if (Cg.samples(FuncId) > 0) {
182Hotfuncs++;
183
184if (PrintDetailed)
185BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId)
186<< " (" << Cg.size(FuncId) << ")\n";
187
188uint64_t Dist = 0;
189uint64_t Calls = 0;
190for (NodeId Dst : Cg.successors(FuncId)) {
191if (FuncId == Dst) // ignore recursive calls in stats
192continue;
193const Arc &Arc = *Cg.findArc(FuncId, Dst);
194const auto D = std::abs(FuncAddr[Arc.dst()] -
195(FuncAddr[FuncId] + Arc.avgCallOffset()));
196const double W = Arc.weight();
197if (D < 64 && PrintDetailed && opts::Verbosity > 2)
198BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n"
199<< "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId)
200<< "\n"
201<< "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
202<< "BOLT-INFO: Weight = " << W << "\n"
203<< "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset()
204<< "\n";
205Calls += W;
206if (D < 64)
207TotalCalls64B += W;
208if (D < 4096)
209TotalCalls4KB += W;
210if (D < (2 << 20))
211TotalCalls2MB += W;
212Dist += Arc.weight() * D;
213if (PrintDetailed)
214BC.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
215"weight = %.0lf, callDist = %f\n",
216Arc.src(), FuncAddr[Arc.src()],
217Arc.avgCallOffset(), Arc.dst(),
218FuncAddr[Arc.dst()], Arc.weight(), D);
219}
220TotalCalls += Calls;
221TotalDistance += Dist;
222TotalSize += Cg.size(FuncId);
223
224if (PrintDetailed) {
225BC.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
226TotalSize, Calls ? Dist / Calls : 0)
227<< Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
228const uint64_t NewPage = TotalSize / HugePageSize;
229if (NewPage != CurPage) {
230CurPage = NewPage;
231BC.outs() << format(
232"BOLT-INFO: ============== page %u ==============\n", CurPage);
233}
234}
235}
236}
237}
238BC.outs() << "BOLT-INFO: Function reordering stats\n"
239<< format("BOLT-INFO: Number of hot functions: %u\n"
240"BOLT-INFO: Number of clusters: %lu\n",
241Hotfuncs, Clusters.size())
242<< format("BOLT-INFO: Final average call distance = %.1lf "
243"(%.0lf / %.0lf)\n",
244TotalCalls ? TotalDistance / TotalCalls : 0,
245TotalDistance, TotalCalls)
246<< format("BOLT-INFO: Total Calls = %.0lf\n", TotalCalls);
247if (TotalCalls)
248BC.outs()
249<< format("BOLT-INFO: Total Calls within 64B = %.0lf (%.2lf%%)\n",
250TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
251<< format("BOLT-INFO: Total Calls within 4KB = %.0lf (%.2lf%%)\n",
252TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
253<< format("BOLT-INFO: Total Calls within 2MB = %.0lf (%.2lf%%)\n",
254TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
255}
256
257Error ReorderFunctions::readFunctionOrderFile(
258std::vector<std::string> &FunctionNames) {
259std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
260if (!FuncsFile)
261return createFatalBOLTError(Twine("Ordered functions file \"") +
262Twine(opts::FunctionOrderFile) +
263Twine("\" can't be opened."));
264
265std::string FuncName;
266while (std::getline(FuncsFile, FuncName))
267FunctionNames.push_back(FuncName);
268return Error::success();
269}
270
271Error ReorderFunctions::runOnFunctions(BinaryContext &BC) {
272auto &BFs = BC.getBinaryFunctions();
273if (opts::ReorderFunctions != RT_NONE &&
274opts::ReorderFunctions != RT_EXEC_COUNT &&
275opts::ReorderFunctions != RT_USER) {
276Cg = buildCallGraph(
277BC,
278[](const BinaryFunction &BF) {
279if (!BF.hasProfile())
280return true;
281if (BF.getState() != BinaryFunction::State::CFG)
282return true;
283return false;
284},
285opts::CgFromPerfData,
286/*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
287opts::CgUseSplitHotSize, opts::UseEdgeCounts,
288opts::CgIgnoreRecursiveCalls);
289Cg.normalizeArcWeights();
290}
291
292std::vector<Cluster> Clusters;
293
294switch (opts::ReorderFunctions) {
295case RT_NONE:
296break;
297case RT_EXEC_COUNT: {
298std::vector<BinaryFunction *> SortedFunctions(BFs.size());
299llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
300[](BinaryFunction &BF) { return &BF; });
301llvm::stable_sort(SortedFunctions,
302[&](const BinaryFunction *A, const BinaryFunction *B) {
303if (A->isIgnored())
304return false;
305if (B->isIgnored())
306return true;
307const size_t PadA = opts::padFunction(*A);
308const size_t PadB = opts::padFunction(*B);
309if (!PadA || !PadB) {
310if (PadA)
311return true;
312if (PadB)
313return false;
314}
315if (!A->hasProfile())
316return false;
317if (!B->hasProfile())
318return true;
319return A->getExecutionCount() > B->getExecutionCount();
320});
321uint32_t Index = 0;
322for (BinaryFunction *BF : SortedFunctions)
323if (BF->hasProfile()) {
324BF->setIndex(Index++);
325LLVM_DEBUG(if (opts::Verbosity > 1) {
326dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
327<< BF->getExecutionCount() << ")\n";
328});
329}
330} break;
331case RT_HFSORT:
332Clusters = clusterize(Cg);
333break;
334case RT_CDSORT: {
335// It is required that the sum of incoming arc weights is not greater
336// than the number of samples for every function. Ensuring the call graph
337// obeys the property before running the algorithm.
338Cg.adjustArcWeights();
339
340// Initialize CFG nodes and their data
341std::vector<uint64_t> FuncSizes;
342std::vector<uint64_t> FuncCounts;
343std::vector<codelayout::EdgeCount> CallCounts;
344std::vector<uint64_t> CallOffsets;
345for (NodeId F = 0; F < Cg.numNodes(); ++F) {
346FuncSizes.push_back(Cg.size(F));
347FuncCounts.push_back(Cg.samples(F));
348for (NodeId Succ : Cg.successors(F)) {
349const Arc &Arc = *Cg.findArc(F, Succ);
350CallCounts.push_back({F, Succ, uint64_t(Arc.weight())});
351CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
352}
353}
354
355// Run the layout algorithm.
356std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
357FuncSizes, FuncCounts, CallCounts, CallOffsets);
358
359// Create a single cluster from the computed order of hot functions.
360std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
361Clusters.emplace_back(Cluster(NodeOrder, Cg));
362} break;
363case RT_PETTIS_HANSEN:
364Clusters = pettisAndHansen(Cg);
365break;
366case RT_RANDOM:
367std::srand(opts::RandomSeed);
368Clusters = randomClusters(Cg);
369break;
370case RT_USER: {
371// Build LTOCommonNameMap
372StringMap<std::vector<uint64_t>> LTOCommonNameMap;
373for (const BinaryFunction &BF : llvm::make_second_range(BFs))
374for (StringRef Name : BF.getNames())
375if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
376LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
377
378uint32_t Index = 0;
379uint32_t InvalidEntries = 0;
380std::vector<std::string> FunctionNames;
381if (Error E = readFunctionOrderFile(FunctionNames))
382return Error(std::move(E));
383
384for (const std::string &Function : FunctionNames) {
385std::vector<uint64_t> FuncAddrs;
386
387BinaryData *BD = BC.getBinaryDataByName(Function);
388if (!BD) {
389// If we can't find the main symbol name, look for alternates.
390uint32_t LocalID = 1;
391while (true) {
392const std::string FuncName = Function + "/" + std::to_string(LocalID);
393BD = BC.getBinaryDataByName(FuncName);
394if (BD)
395FuncAddrs.push_back(BD->getAddress());
396else
397break;
398LocalID++;
399}
400// Strip LTO suffixes
401if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
402if (LTOCommonNameMap.contains(*CommonName))
403llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
404} else {
405FuncAddrs.push_back(BD->getAddress());
406}
407
408if (FuncAddrs.empty()) {
409if (opts::Verbosity >= 1)
410BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
411<< "for " << Function << "\n";
412++InvalidEntries;
413continue;
414}
415
416for (const uint64_t FuncAddr : FuncAddrs) {
417const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
418assert(FuncBD);
419
420BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
421if (!BF) {
422if (opts::Verbosity >= 1)
423BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
424<< "for " << Function << "\n";
425++InvalidEntries;
426break;
427}
428if (!BF->hasValidIndex())
429BF->setIndex(Index++);
430else if (opts::Verbosity > 0)
431BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
432<< "\n";
433}
434}
435if (InvalidEntries)
436BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
437<< InvalidEntries << " entries in -function-order list\n";
438} break;
439
440default:
441llvm_unreachable("unexpected layout type");
442}
443
444reorder(BC, std::move(Clusters), BFs);
445
446BC.HasFinalizedFunctionOrder = true;
447
448std::unique_ptr<std::ofstream> FuncsFile;
449if (!opts::GenerateFunctionOrderFile.empty()) {
450FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
451std::ios::out);
452if (!FuncsFile) {
453BC.errs() << "BOLT-ERROR: ordered functions file "
454<< opts::GenerateFunctionOrderFile << " cannot be opened\n";
455return createFatalBOLTError("");
456}
457}
458
459std::unique_ptr<std::ofstream> LinkSectionsFile;
460if (!opts::LinkSectionsFile.empty()) {
461LinkSectionsFile =
462std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
463if (!LinkSectionsFile) {
464BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
465<< " cannot be opened\n";
466return createFatalBOLTError("");
467}
468}
469
470if (FuncsFile || LinkSectionsFile) {
471std::vector<BinaryFunction *> SortedFunctions(BFs.size());
472llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
473[](BinaryFunction &BF) { return &BF; });
474
475// Sort functions by index.
476llvm::stable_sort(SortedFunctions,
477[](const BinaryFunction *A, const BinaryFunction *B) {
478if (A->hasValidIndex() && B->hasValidIndex())
479return A->getIndex() < B->getIndex();
480if (A->hasValidIndex() && !B->hasValidIndex())
481return true;
482if (!A->hasValidIndex() && B->hasValidIndex())
483return false;
484return A->getAddress() < B->getAddress();
485});
486
487for (const BinaryFunction *Func : SortedFunctions) {
488if (!Func->hasValidIndex())
489break;
490if (Func->isPLTFunction())
491continue;
492
493if (FuncsFile)
494*FuncsFile << Func->getOneName().str() << '\n';
495
496if (LinkSectionsFile) {
497const char *Indent = "";
498std::vector<StringRef> AllNames = Func->getNames();
499llvm::sort(AllNames);
500for (StringRef Name : AllNames) {
501const size_t SlashPos = Name.find('/');
502if (SlashPos != std::string::npos) {
503// Avoid duplicates for local functions.
504if (Name.find('/', SlashPos + 1) != std::string::npos)
505continue;
506Name = Name.substr(0, SlashPos);
507}
508*LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
509Indent = " ";
510}
511}
512}
513
514if (FuncsFile) {
515FuncsFile->close();
516BC.outs() << "BOLT-INFO: dumped function order to "
517<< opts::GenerateFunctionOrderFile << '\n';
518}
519
520if (LinkSectionsFile) {
521LinkSectionsFile->close();
522BC.outs() << "BOLT-INFO: dumped linker section order to "
523<< opts::LinkSectionsFile << '\n';
524}
525}
526return Error::success();
527}
528
529} // namespace bolt
530} // namespace llvm
531