llvm-project

Форк
0
/
ReorderFunctions.cpp 
530 строк · 19.5 Кб
1
//===- bolt/Passes/ReorderFunctions.cpp - Function reordering pass --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements ReorderFunctions class.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "bolt/Passes/ReorderFunctions.h"
14
#include "bolt/Passes/HFSort.h"
15
#include "bolt/Utils/Utils.h"
16
#include "llvm/ADT/STLExtras.h"
17
#include "llvm/Support/CommandLine.h"
18
#include "llvm/Transforms/Utils/CodeLayout.h"
19
#include <fstream>
20

21
#define DEBUG_TYPE "hfsort"
22

23
using namespace llvm;
24

25
namespace opts {
26

27
extern cl::OptionCategory BoltOptCategory;
28
extern cl::opt<unsigned> Verbosity;
29
extern cl::opt<uint32_t> RandomSeed;
30

31
extern size_t padFunction(const bolt::BinaryFunction &Function);
32

33
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
34
cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
35
    "reorder-functions",
36
    cl::desc("reorder and cluster functions (works only with relocations)"),
37
    cl::init(bolt::ReorderFunctions::RT_NONE),
38
    cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none",
39
                          "do not reorder functions"),
40
               clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count",
41
                          "order by execution count"),
42
               clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
43
                          "use hfsort algorithm"),
44
               clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
45
                          "use cache-directed sort"),
46
               clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort",
47
                          "use cache-directed sort"),
48
               clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
49
                          "pettis-hansen", "use Pettis-Hansen algorithm"),
50
               clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random",
51
                          "reorder functions randomly"),
52
               clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
53
                          "use function order specified by -function-order")),
54
    cl::ZeroOrMore, cl::cat(BoltOptCategory),
55
    cl::callback([](const bolt::ReorderFunctions::ReorderType &option) {
56
      if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) {
57
        errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
58
               << " please use '-reorder-functions=cdsort' instead\n";
59
        ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT;
60
      }
61
    }));
62

63
static cl::opt<bool> ReorderFunctionsUseHotSize(
64
    "reorder-functions-use-hot-size",
65
    cl::desc("use a function's hot size when doing clustering"), cl::init(true),
66
    cl::cat(BoltOptCategory));
67

68
static cl::opt<std::string> FunctionOrderFile(
69
    "function-order",
70
    cl::desc("file containing an ordered list of functions to use for function "
71
             "reordering"),
72
    cl::cat(BoltOptCategory));
73

74
static cl::opt<std::string> GenerateFunctionOrderFile(
75
    "generate-function-order",
76
    cl::desc("file to dump the ordered list of functions to use for function "
77
             "reordering"),
78
    cl::cat(BoltOptCategory));
79

80
static cl::opt<std::string> LinkSectionsFile(
81
    "generate-link-sections",
82
    cl::desc("generate a list of function sections in a format suitable for "
83
             "inclusion in a linker script"),
84
    cl::cat(BoltOptCategory));
85

86
static cl::opt<bool>
87
    UseEdgeCounts("use-edge-counts",
88
                  cl::desc("use edge count data when doing clustering"),
89
                  cl::init(true), cl::cat(BoltOptCategory));
90

91
static cl::opt<bool> CgFromPerfData(
92
    "cg-from-perf-data",
93
    cl::desc("use perf data directly when constructing the call graph"
94
             " for stale functions"),
95
    cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory));
96

97
static cl::opt<bool> CgIgnoreRecursiveCalls(
98
    "cg-ignore-recursive-calls",
99
    cl::desc("ignore recursive calls when constructing the call graph"),
100
    cl::init(true), cl::cat(BoltOptCategory));
101

102
static cl::opt<bool> CgUseSplitHotSize(
103
    "cg-use-split-hot-size",
104
    cl::desc("use hot/cold data on basic blocks to determine hot sizes for "
105
             "call graph functions"),
106
    cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory));
107

108
} // namespace opts
109

110
namespace llvm {
111
namespace bolt {
112

113
using NodeId = CallGraph::NodeId;
114
using Arc = CallGraph::Arc;
115
using Node = CallGraph::Node;
116

117
void ReorderFunctions::reorder(BinaryContext &BC,
118
                               std::vector<Cluster> &&Clusters,
119
                               std::map<uint64_t, BinaryFunction> &BFs) {
120
  std::vector<uint64_t> FuncAddr(Cg.numNodes()); // Just for computing stats
121
  uint64_t TotalSize = 0;
122
  uint32_t Index = 0;
123

124
  // Set order of hot functions based on clusters.
125
  for (const Cluster &Cluster : Clusters) {
126
    for (const NodeId FuncId : Cluster.targets()) {
127
      Cg.nodeIdToFunc(FuncId)->setIndex(Index++);
128
      FuncAddr[FuncId] = TotalSize;
129
      TotalSize += Cg.size(FuncId);
130
    }
131
  }
132

133
  // Assign valid index for functions with valid profile.
134
  for (auto &It : BFs) {
135
    BinaryFunction &BF = It.second;
136
    if (!BF.hasValidIndex() && BF.hasValidProfile())
137
      BF.setIndex(Index++);
138
  }
139

140
  if (opts::ReorderFunctions == RT_NONE)
141
    return;
142

143
  printStats(BC, Clusters, FuncAddr);
144
}
145

146
void ReorderFunctions::printStats(BinaryContext &BC,
147
                                  const std::vector<Cluster> &Clusters,
148
                                  const std::vector<uint64_t> &FuncAddr) {
149
  if (opts::Verbosity == 0) {
150
#ifndef NDEBUG
151
    if (!DebugFlag || !isCurrentDebugType("hfsort"))
152
      return;
153
#else
154
    return;
155
#endif
156
  }
157

158
  bool PrintDetailed = opts::Verbosity > 1;
159
#ifndef NDEBUG
160
  PrintDetailed |=
161
      (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0);
162
#endif
163
  uint64_t TotalSize = 0;
164
  uint64_t CurPage = 0;
165
  uint64_t Hotfuncs = 0;
166
  double TotalDistance = 0;
167
  double TotalCalls = 0;
168
  double TotalCalls64B = 0;
169
  double TotalCalls4KB = 0;
170
  double TotalCalls2MB = 0;
171
  if (PrintDetailed)
172
    BC.outs() << "BOLT-INFO: Function reordering page layout\n"
173
              << "BOLT-INFO: ============== page 0 ==============\n";
174
  for (const Cluster &Cluster : Clusters) {
175
    if (PrintDetailed)
176
      BC.outs() << format(
177
          "BOLT-INFO: -------- density = %.3lf (%u / %u) --------\n",
178
          Cluster.density(), Cluster.samples(), Cluster.size());
179

180
    for (NodeId FuncId : Cluster.targets()) {
181
      if (Cg.samples(FuncId) > 0) {
182
        Hotfuncs++;
183

184
        if (PrintDetailed)
185
          BC.outs() << "BOLT-INFO: hot func " << *Cg.nodeIdToFunc(FuncId)
186
                    << " (" << Cg.size(FuncId) << ")\n";
187

188
        uint64_t Dist = 0;
189
        uint64_t Calls = 0;
190
        for (NodeId Dst : Cg.successors(FuncId)) {
191
          if (FuncId == Dst) // ignore recursive calls in stats
192
            continue;
193
          const Arc &Arc = *Cg.findArc(FuncId, Dst);
194
          const auto D = std::abs(FuncAddr[Arc.dst()] -
195
                                  (FuncAddr[FuncId] + Arc.avgCallOffset()));
196
          const double W = Arc.weight();
197
          if (D < 64 && PrintDetailed && opts::Verbosity > 2)
198
            BC.outs() << "BOLT-INFO: short (" << D << "B) call:\n"
199
                      << "BOLT-INFO:   Src: " << *Cg.nodeIdToFunc(FuncId)
200
                      << "\n"
201
                      << "BOLT-INFO:   Dst: " << *Cg.nodeIdToFunc(Dst) << "\n"
202
                      << "BOLT-INFO:   Weight = " << W << "\n"
203
                      << "BOLT-INFO:   AvgOffset = " << Arc.avgCallOffset()
204
                      << "\n";
205
          Calls += W;
206
          if (D < 64)
207
            TotalCalls64B += W;
208
          if (D < 4096)
209
            TotalCalls4KB += W;
210
          if (D < (2 << 20))
211
            TotalCalls2MB += W;
212
          Dist += Arc.weight() * D;
213
          if (PrintDetailed)
214
            BC.outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: "
215
                                "weight = %.0lf, callDist = %f\n",
216
                                Arc.src(), FuncAddr[Arc.src()],
217
                                Arc.avgCallOffset(), Arc.dst(),
218
                                FuncAddr[Arc.dst()], Arc.weight(), D);
219
        }
220
        TotalCalls += Calls;
221
        TotalDistance += Dist;
222
        TotalSize += Cg.size(FuncId);
223

224
        if (PrintDetailed) {
225
          BC.outs() << format("BOLT-INFO: start = %6u : avgCallDist = %lu : ",
226
                              TotalSize, Calls ? Dist / Calls : 0)
227
                    << Cg.nodeIdToFunc(FuncId)->getPrintName() << '\n';
228
          const uint64_t NewPage = TotalSize / HugePageSize;
229
          if (NewPage != CurPage) {
230
            CurPage = NewPage;
231
            BC.outs() << format(
232
                "BOLT-INFO: ============== page %u ==============\n", CurPage);
233
          }
234
        }
235
      }
236
    }
237
  }
238
  BC.outs() << "BOLT-INFO: Function reordering stats\n"
239
            << format("BOLT-INFO:  Number of hot functions: %u\n"
240
                      "BOLT-INFO:  Number of clusters: %lu\n",
241
                      Hotfuncs, Clusters.size())
242
            << format("BOLT-INFO:  Final average call distance = %.1lf "
243
                      "(%.0lf / %.0lf)\n",
244
                      TotalCalls ? TotalDistance / TotalCalls : 0,
245
                      TotalDistance, TotalCalls)
246
            << format("BOLT-INFO:  Total Calls = %.0lf\n", TotalCalls);
247
  if (TotalCalls)
248
    BC.outs()
249
        << format("BOLT-INFO:  Total Calls within 64B = %.0lf (%.2lf%%)\n",
250
                  TotalCalls64B, 100 * TotalCalls64B / TotalCalls)
251
        << format("BOLT-INFO:  Total Calls within 4KB = %.0lf (%.2lf%%)\n",
252
                  TotalCalls4KB, 100 * TotalCalls4KB / TotalCalls)
253
        << format("BOLT-INFO:  Total Calls within 2MB = %.0lf (%.2lf%%)\n",
254
                  TotalCalls2MB, 100 * TotalCalls2MB / TotalCalls);
255
}
256

257
Error ReorderFunctions::readFunctionOrderFile(
258
    std::vector<std::string> &FunctionNames) {
259
  std::ifstream FuncsFile(opts::FunctionOrderFile, std::ios::in);
260
  if (!FuncsFile)
261
    return createFatalBOLTError(Twine("Ordered functions file \"") +
262
                                Twine(opts::FunctionOrderFile) +
263
                                Twine("\" can't be opened."));
264

265
  std::string FuncName;
266
  while (std::getline(FuncsFile, FuncName))
267
    FunctionNames.push_back(FuncName);
268
  return Error::success();
269
}
270

271
Error ReorderFunctions::runOnFunctions(BinaryContext &BC) {
272
  auto &BFs = BC.getBinaryFunctions();
273
  if (opts::ReorderFunctions != RT_NONE &&
274
      opts::ReorderFunctions != RT_EXEC_COUNT &&
275
      opts::ReorderFunctions != RT_USER) {
276
    Cg = buildCallGraph(
277
        BC,
278
        [](const BinaryFunction &BF) {
279
          if (!BF.hasProfile())
280
            return true;
281
          if (BF.getState() != BinaryFunction::State::CFG)
282
            return true;
283
          return false;
284
        },
285
        opts::CgFromPerfData,
286
        /*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
287
        opts::CgUseSplitHotSize, opts::UseEdgeCounts,
288
        opts::CgIgnoreRecursiveCalls);
289
    Cg.normalizeArcWeights();
290
  }
291

292
  std::vector<Cluster> Clusters;
293

294
  switch (opts::ReorderFunctions) {
295
  case RT_NONE:
296
    break;
297
  case RT_EXEC_COUNT: {
298
    std::vector<BinaryFunction *> SortedFunctions(BFs.size());
299
    llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
300
                    [](BinaryFunction &BF) { return &BF; });
301
    llvm::stable_sort(SortedFunctions,
302
                      [&](const BinaryFunction *A, const BinaryFunction *B) {
303
                        if (A->isIgnored())
304
                          return false;
305
                        if (B->isIgnored())
306
                          return true;
307
                        const size_t PadA = opts::padFunction(*A);
308
                        const size_t PadB = opts::padFunction(*B);
309
                        if (!PadA || !PadB) {
310
                          if (PadA)
311
                            return true;
312
                          if (PadB)
313
                            return false;
314
                        }
315
                        if (!A->hasProfile())
316
                          return false;
317
                        if (!B->hasProfile())
318
                          return true;
319
                        return A->getExecutionCount() > B->getExecutionCount();
320
                      });
321
    uint32_t Index = 0;
322
    for (BinaryFunction *BF : SortedFunctions)
323
      if (BF->hasProfile()) {
324
        BF->setIndex(Index++);
325
        LLVM_DEBUG(if (opts::Verbosity > 1) {
326
          dbgs() << "BOLT-INFO: hot func " << BF->getPrintName() << " ("
327
                 << BF->getExecutionCount() << ")\n";
328
        });
329
      }
330
  } break;
331
  case RT_HFSORT:
332
    Clusters = clusterize(Cg);
333
    break;
334
  case RT_CDSORT: {
335
    // It is required that the sum of incoming arc weights is not greater
336
    // than the number of samples for every function. Ensuring the call graph
337
    // obeys the property before running the algorithm.
338
    Cg.adjustArcWeights();
339

340
    // Initialize CFG nodes and their data
341
    std::vector<uint64_t> FuncSizes;
342
    std::vector<uint64_t> FuncCounts;
343
    std::vector<codelayout::EdgeCount> CallCounts;
344
    std::vector<uint64_t> CallOffsets;
345
    for (NodeId F = 0; F < Cg.numNodes(); ++F) {
346
      FuncSizes.push_back(Cg.size(F));
347
      FuncCounts.push_back(Cg.samples(F));
348
      for (NodeId Succ : Cg.successors(F)) {
349
        const Arc &Arc = *Cg.findArc(F, Succ);
350
        CallCounts.push_back({F, Succ, uint64_t(Arc.weight())});
351
        CallOffsets.push_back(uint64_t(Arc.avgCallOffset()));
352
      }
353
    }
354

355
    // Run the layout algorithm.
356
    std::vector<uint64_t> Result = codelayout::computeCacheDirectedLayout(
357
        FuncSizes, FuncCounts, CallCounts, CallOffsets);
358

359
    // Create a single cluster from the computed order of hot functions.
360
    std::vector<CallGraph::NodeId> NodeOrder(Result.begin(), Result.end());
361
    Clusters.emplace_back(Cluster(NodeOrder, Cg));
362
  } break;
363
  case RT_PETTIS_HANSEN:
364
    Clusters = pettisAndHansen(Cg);
365
    break;
366
  case RT_RANDOM:
367
    std::srand(opts::RandomSeed);
368
    Clusters = randomClusters(Cg);
369
    break;
370
  case RT_USER: {
371
    // Build LTOCommonNameMap
372
    StringMap<std::vector<uint64_t>> LTOCommonNameMap;
373
    for (const BinaryFunction &BF : llvm::make_second_range(BFs))
374
      for (StringRef Name : BF.getNames())
375
        if (std::optional<StringRef> LTOCommonName = getLTOCommonName(Name))
376
          LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress());
377

378
    uint32_t Index = 0;
379
    uint32_t InvalidEntries = 0;
380
    std::vector<std::string> FunctionNames;
381
    if (Error E = readFunctionOrderFile(FunctionNames))
382
      return Error(std::move(E));
383

384
    for (const std::string &Function : FunctionNames) {
385
      std::vector<uint64_t> FuncAddrs;
386

387
      BinaryData *BD = BC.getBinaryDataByName(Function);
388
      if (!BD) {
389
        // If we can't find the main symbol name, look for alternates.
390
        uint32_t LocalID = 1;
391
        while (true) {
392
          const std::string FuncName = Function + "/" + std::to_string(LocalID);
393
          BD = BC.getBinaryDataByName(FuncName);
394
          if (BD)
395
            FuncAddrs.push_back(BD->getAddress());
396
          else
397
            break;
398
          LocalID++;
399
        }
400
        // Strip LTO suffixes
401
        if (std::optional<StringRef> CommonName = getLTOCommonName(Function))
402
          if (LTOCommonNameMap.contains(*CommonName))
403
            llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]);
404
      } else {
405
        FuncAddrs.push_back(BD->getAddress());
406
      }
407

408
      if (FuncAddrs.empty()) {
409
        if (opts::Verbosity >= 1)
410
          BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
411
                    << "for " << Function << "\n";
412
        ++InvalidEntries;
413
        continue;
414
      }
415

416
      for (const uint64_t FuncAddr : FuncAddrs) {
417
        const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr);
418
        assert(FuncBD);
419

420
        BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol());
421
        if (!BF) {
422
          if (opts::Verbosity >= 1)
423
            BC.errs() << "BOLT-WARNING: Reorder functions: can't find function "
424
                      << "for " << Function << "\n";
425
          ++InvalidEntries;
426
          break;
427
        }
428
        if (!BF->hasValidIndex())
429
          BF->setIndex(Index++);
430
        else if (opts::Verbosity > 0)
431
          BC.errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
432
                    << "\n";
433
      }
434
    }
435
    if (InvalidEntries)
436
      BC.errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
437
                << InvalidEntries << " entries in -function-order list\n";
438
  } break;
439

440
  default:
441
    llvm_unreachable("unexpected layout type");
442
  }
443

444
  reorder(BC, std::move(Clusters), BFs);
445

446
  BC.HasFinalizedFunctionOrder = true;
447

448
  std::unique_ptr<std::ofstream> FuncsFile;
449
  if (!opts::GenerateFunctionOrderFile.empty()) {
450
    FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
451
                                                std::ios::out);
452
    if (!FuncsFile) {
453
      BC.errs() << "BOLT-ERROR: ordered functions file "
454
                << opts::GenerateFunctionOrderFile << " cannot be opened\n";
455
      return createFatalBOLTError("");
456
    }
457
  }
458

459
  std::unique_ptr<std::ofstream> LinkSectionsFile;
460
  if (!opts::LinkSectionsFile.empty()) {
461
    LinkSectionsFile =
462
        std::make_unique<std::ofstream>(opts::LinkSectionsFile, std::ios::out);
463
    if (!LinkSectionsFile) {
464
      BC.errs() << "BOLT-ERROR: link sections file " << opts::LinkSectionsFile
465
                << " cannot be opened\n";
466
      return createFatalBOLTError("");
467
    }
468
  }
469

470
  if (FuncsFile || LinkSectionsFile) {
471
    std::vector<BinaryFunction *> SortedFunctions(BFs.size());
472
    llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(),
473
                    [](BinaryFunction &BF) { return &BF; });
474

475
    // Sort functions by index.
476
    llvm::stable_sort(SortedFunctions,
477
                      [](const BinaryFunction *A, const BinaryFunction *B) {
478
                        if (A->hasValidIndex() && B->hasValidIndex())
479
                          return A->getIndex() < B->getIndex();
480
                        if (A->hasValidIndex() && !B->hasValidIndex())
481
                          return true;
482
                        if (!A->hasValidIndex() && B->hasValidIndex())
483
                          return false;
484
                        return A->getAddress() < B->getAddress();
485
                      });
486

487
    for (const BinaryFunction *Func : SortedFunctions) {
488
      if (!Func->hasValidIndex())
489
        break;
490
      if (Func->isPLTFunction())
491
        continue;
492

493
      if (FuncsFile)
494
        *FuncsFile << Func->getOneName().str() << '\n';
495

496
      if (LinkSectionsFile) {
497
        const char *Indent = "";
498
        std::vector<StringRef> AllNames = Func->getNames();
499
        llvm::sort(AllNames);
500
        for (StringRef Name : AllNames) {
501
          const size_t SlashPos = Name.find('/');
502
          if (SlashPos != std::string::npos) {
503
            // Avoid duplicates for local functions.
504
            if (Name.find('/', SlashPos + 1) != std::string::npos)
505
              continue;
506
            Name = Name.substr(0, SlashPos);
507
          }
508
          *LinkSectionsFile << Indent << ".text." << Name.str() << '\n';
509
          Indent = " ";
510
        }
511
      }
512
    }
513

514
    if (FuncsFile) {
515
      FuncsFile->close();
516
      BC.outs() << "BOLT-INFO: dumped function order to "
517
                << opts::GenerateFunctionOrderFile << '\n';
518
    }
519

520
    if (LinkSectionsFile) {
521
      LinkSectionsFile->close();
522
      BC.outs() << "BOLT-INFO: dumped linker section order to "
523
                << opts::LinkSectionsFile << '\n';
524
    }
525
  }
526
  return Error::success();
527
}
528

529
} // namespace bolt
530
} // namespace llvm
531

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.