llvm-project

Форк
0
1269 строк · 45.8 Кб
1
//===-- PerfReader.cpp - perfscript reader  ---------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
#include "PerfReader.h"
9
#include "ProfileGenerator.h"
10
#include "llvm/ADT/SmallString.h"
11
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
12
#include "llvm/Support/FileSystem.h"
13
#include "llvm/Support/Process.h"
14
#include "llvm/Support/ToolOutputFile.h"
15

16
#define DEBUG_TYPE "perf-reader"
17

18
cl::opt<bool> SkipSymbolization("skip-symbolization",
19
                                cl::desc("Dump the unsymbolized profile to the "
20
                                         "output file. It will show unwinder "
21
                                         "output for CS profile generation."));
22

23
static cl::opt<bool> ShowMmapEvents("show-mmap-events",
24
                                    cl::desc("Print binary load events."));
25

26
static cl::opt<bool>
27
    UseOffset("use-offset", cl::init(true),
28
              cl::desc("Work with `--skip-symbolization` or "
29
                       "`--unsymbolized-profile` to write/read the "
30
                       "offset instead of virtual address."));
31

32
static cl::opt<bool> UseLoadableSegmentAsBase(
33
    "use-first-loadable-segment-as-base",
34
    cl::desc("Use first loadable segment address as base address "
35
             "for offsets in unsymbolized profile. By default "
36
             "first executable segment address is used"));
37

38
static cl::opt<bool>
39
    IgnoreStackSamples("ignore-stack-samples",
40
                       cl::desc("Ignore call stack samples for hybrid samples "
41
                                "and produce context-insensitive profile."));
42
cl::opt<bool> ShowDetailedWarning("show-detailed-warning",
43
                                  cl::desc("Show detailed warning message."));
44

45
extern cl::opt<std::string> PerfTraceFilename;
46
extern cl::opt<bool> ShowDisassemblyOnly;
47
extern cl::opt<bool> ShowSourceLocations;
48
extern cl::opt<std::string> OutputFilename;
49

50
namespace llvm {
51
namespace sampleprof {
52

53
void VirtualUnwinder::unwindCall(UnwindState &State) {
54
  uint64_t Source = State.getCurrentLBRSource();
55
  auto *ParentFrame = State.getParentFrame();
56
  // The 2nd frame after leaf could be missing if stack sample is
57
  // taken when IP is within prolog/epilog, as frame chain isn't
58
  // setup yet. Fill in the missing frame in that case.
59
  // TODO: Currently we just assume all the addr that can't match the
60
  // 2nd frame is in prolog/epilog. In the future, we will switch to
61
  // pro/epi tracker(Dwarf CFI) for the precise check.
62
  if (ParentFrame == State.getDummyRootPtr() ||
63
      ParentFrame->Address != Source) {
64
    State.switchToFrame(Source);
65
    if (ParentFrame != State.getDummyRootPtr()) {
66
      if (Source == ExternalAddr)
67
        NumMismatchedExtCallBranch++;
68
      else
69
        NumMismatchedProEpiBranch++;
70
    }
71
  } else {
72
    State.popFrame();
73
  }
74
  State.InstPtr.update(Source);
75
}
76

77
void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
78
  InstructionPointer &IP = State.InstPtr;
79
  uint64_t Target = State.getCurrentLBRTarget();
80
  uint64_t End = IP.Address;
81

82
  if (End == ExternalAddr && Target == ExternalAddr) {
83
    // Filter out the case when leaf external frame matches the external LBR
84
    // target, this is a valid state, it happens that the code run into external
85
    // address then return back.  The call frame under the external frame
86
    // remains valid and can be unwound later, just skip recording this range.
87
    NumPairedExtAddr++;
88
    return;
89
  }
90

91
  if (End == ExternalAddr || Target == ExternalAddr) {
92
    // Range is invalid if only one point is external address. This means LBR
93
    // traces contains a standalone external address failing to pair another
94
    // one, likely due to interrupt jmp or broken perf script. Set the
95
    // state to invalid.
96
    NumUnpairedExtAddr++;
97
    State.setInvalid();
98
    return;
99
  }
100

101
  if (!isValidFallThroughRange(Target, End, Binary)) {
102
    // Skip unwinding the rest of LBR trace when a bogus range is seen.
103
    State.setInvalid();
104
    return;
105
  }
106

107
  if (Binary->usePseudoProbes()) {
108
    // We don't need to top frame probe since it should be extracted
109
    // from the range.
110
    // The outcome of the virtual unwinding with pseudo probes is a
111
    // map from a context key to the address range being unwound.
112
    // This means basically linear unwinding is not needed for pseudo
113
    // probes. The range will be simply recorded here and will be
114
    // converted to a list of pseudo probes to report in ProfileGenerator.
115
    State.getParentFrame()->recordRangeCount(Target, End, Repeat);
116
  } else {
117
    // Unwind linear execution part.
118
    // Split and record the range by different inline context. For example:
119
    // [0x01] ... main:1          # Target
120
    // [0x02] ... main:2
121
    // [0x03] ... main:3 @ foo:1
122
    // [0x04] ... main:3 @ foo:2
123
    // [0x05] ... main:3 @ foo:3
124
    // [0x06] ... main:4
125
    // [0x07] ... main:5          # End
126
    // It will be recorded:
127
    // [main:*]         : [0x06, 0x07], [0x01, 0x02]
128
    // [main:3 @ foo:*] : [0x03, 0x05]
129
    while (IP.Address > Target) {
130
      uint64_t PrevIP = IP.Address;
131
      IP.backward();
132
      // Break into segments for implicit call/return due to inlining
133
      bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
134
      if (!SameInlinee) {
135
        State.switchToFrame(PrevIP);
136
        State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
137
        End = IP.Address;
138
      }
139
    }
140
    assert(IP.Address == Target && "The last one must be the target address.");
141
    // Record the remaining range, [0x01, 0x02] in the example
142
    State.switchToFrame(IP.Address);
143
    State.CurrentLeafFrame->recordRangeCount(IP.Address, End, Repeat);
144
  }
145
}
146

147
void VirtualUnwinder::unwindReturn(UnwindState &State) {
148
  // Add extra frame as we unwind through the return
149
  const LBREntry &LBR = State.getCurrentLBR();
150
  uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
151
  State.switchToFrame(CallAddr);
152
  State.pushFrame(LBR.Source);
153
  State.InstPtr.update(LBR.Source);
154
}
155

156
void VirtualUnwinder::unwindBranch(UnwindState &State) {
157
  // TODO: Tolerate tail call for now, as we may see tail call from libraries.
158
  // This is only for intra function branches, excluding tail calls.
159
  uint64_t Source = State.getCurrentLBRSource();
160
  State.switchToFrame(Source);
161
  State.InstPtr.update(Source);
162
}
163

164
std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
165
  std::shared_ptr<StringBasedCtxKey> KeyStr =
166
      std::make_shared<StringBasedCtxKey>();
167
  KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined);
168
  return KeyStr;
169
}
170

171
std::shared_ptr<AddrBasedCtxKey> AddressStack::getContextKey() {
172
  std::shared_ptr<AddrBasedCtxKey> KeyStr = std::make_shared<AddrBasedCtxKey>();
173
  KeyStr->Context = Stack;
174
  CSProfileGenerator::compressRecursionContext<uint64_t>(KeyStr->Context);
175
  CSProfileGenerator::trimContext<uint64_t>(KeyStr->Context);
176
  return KeyStr;
177
}
178

179
template <typename T>
180
void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
181
                                              T &Stack) {
182
  if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
183
    return;
184

185
  std::shared_ptr<ContextKey> Key = Stack.getContextKey();
186
  if (Key == nullptr)
187
    return;
188
  auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
189
  SampleCounter &SCounter = Ret.first->second;
190
  for (auto &I : Cur->RangeSamples)
191
    SCounter.recordRangeCount(std::get<0>(I), std::get<1>(I), std::get<2>(I));
192

193
  for (auto &I : Cur->BranchSamples)
194
    SCounter.recordBranchCount(std::get<0>(I), std::get<1>(I), std::get<2>(I));
195
}
196

197
template <typename T>
198
void VirtualUnwinder::collectSamplesFromFrameTrie(
199
    UnwindState::ProfiledFrame *Cur, T &Stack) {
200
  if (!Cur->isDummyRoot()) {
201
    // Truncate the context for external frame since this isn't a real call
202
    // context the compiler will see.
203
    if (Cur->isExternalFrame() || !Stack.pushFrame(Cur)) {
204
      // Process truncated context
205
      // Start a new traversal ignoring its bottom context
206
      T EmptyStack(Binary);
207
      collectSamplesFromFrame(Cur, EmptyStack);
208
      for (const auto &Item : Cur->Children) {
209
        collectSamplesFromFrameTrie(Item.second.get(), EmptyStack);
210
      }
211

212
      // Keep note of untracked call site and deduplicate them
213
      // for warning later.
214
      if (!Cur->isLeafFrame())
215
        UntrackedCallsites.insert(Cur->Address);
216

217
      return;
218
    }
219
  }
220

221
  collectSamplesFromFrame(Cur, Stack);
222
  // Process children frame
223
  for (const auto &Item : Cur->Children) {
224
    collectSamplesFromFrameTrie(Item.second.get(), Stack);
225
  }
226
  // Recover the call stack
227
  Stack.popFrame();
228
}
229

230
void VirtualUnwinder::collectSamplesFromFrameTrie(
231
    UnwindState::ProfiledFrame *Cur) {
232
  if (Binary->usePseudoProbes()) {
233
    AddressStack Stack(Binary);
234
    collectSamplesFromFrameTrie<AddressStack>(Cur, Stack);
235
  } else {
236
    FrameStack Stack(Binary);
237
    collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
238
  }
239
}
240

241
void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
242
                                        UnwindState &State, uint64_t Repeat) {
243
  if (Branch.Target == ExternalAddr)
244
    return;
245

246
  // Record external-to-internal pattern on the trie root, it later can be
247
  // used for generating head samples.
248
  if (Branch.Source == ExternalAddr) {
249
    State.getDummyRootPtr()->recordBranchCount(Branch.Source, Branch.Target,
250
                                               Repeat);
251
    return;
252
  }
253

254
  if (Binary->usePseudoProbes()) {
255
    // Same as recordRangeCount, We don't need to top frame probe since we will
256
    // extract it from branch's source address
257
    State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
258
                                              Repeat);
259
  } else {
260
    State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
261
                                              Repeat);
262
  }
263
}
264

265
bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
266
  // Capture initial state as starting point for unwinding.
267
  UnwindState State(Sample, Binary);
268

269
  // Sanity check - making sure leaf of LBR aligns with leaf of stack sample
270
  // Stack sample sometimes can be unreliable, so filter out bogus ones.
271
  if (!State.validateInitialState())
272
    return false;
273

274
  NumTotalBranches += State.LBRStack.size();
275
  // Now process the LBR samples in parrallel with stack sample
276
  // Note that we do not reverse the LBR entry order so we can
277
  // unwind the sample stack as we walk through LBR entries.
278
  while (State.hasNextLBR()) {
279
    State.checkStateConsistency();
280

281
    // Do not attempt linear unwind for the leaf range as it's incomplete.
282
    if (!State.IsLastLBR()) {
283
      // Unwind implicit calls/returns from inlining, along the linear path,
284
      // break into smaller sub section each with its own calling context.
285
      unwindLinear(State, Repeat);
286
    }
287

288
    // Save the LBR branch before it gets unwound.
289
    const LBREntry &Branch = State.getCurrentLBR();
290
    if (isCallState(State)) {
291
      // Unwind calls - we know we encountered call if LBR overlaps with
292
      // transition between leaf the 2nd frame. Note that for calls that
293
      // were not in the original stack sample, we should have added the
294
      // extra frame when processing the return paired with this call.
295
      unwindCall(State);
296
    } else if (isReturnState(State)) {
297
      // Unwind returns - check whether the IP is indeed at a return
298
      // instruction
299
      unwindReturn(State);
300
    } else if (isValidState(State)) {
301
      // Unwind branches
302
      unwindBranch(State);
303
    } else {
304
      // Skip unwinding the rest of LBR trace. Reset the stack and update the
305
      // state so that the rest of the trace can still be processed as if they
306
      // do not have stack samples.
307
      State.clearCallStack();
308
      State.InstPtr.update(State.getCurrentLBRSource());
309
      State.pushFrame(State.InstPtr.Address);
310
    }
311

312
    State.advanceLBR();
313
    // Record `branch` with calling context after unwinding.
314
    recordBranchCount(Branch, State, Repeat);
315
  }
316
  // As samples are aggregated on trie, record them into counter map
317
  collectSamplesFromFrameTrie(State.getDummyRootPtr());
318

319
  return true;
320
}
321

322
std::unique_ptr<PerfReaderBase>
323
PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
324
                       std::optional<int32_t> PIDFilter) {
325
  std::unique_ptr<PerfReaderBase> PerfReader;
326

327
  if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) {
328
    PerfReader.reset(
329
        new UnsymbolizedProfileReader(Binary, PerfInput.InputFile));
330
    return PerfReader;
331
  }
332

333
  // For perf data input, we need to convert them into perf script first.
334
  // If this is a kernel perf file, there is no need for retrieving PIDs.
335
  if (PerfInput.Format == PerfFormat::PerfData)
336
    PerfInput = PerfScriptReader::convertPerfDataToTrace(
337
        Binary, Binary->isKernel(), PerfInput, PIDFilter);
338

339
  assert((PerfInput.Format == PerfFormat::PerfScript) &&
340
         "Should be a perfscript!");
341

342
  PerfInput.Content =
343
      PerfScriptReader::checkPerfScriptType(PerfInput.InputFile);
344
  if (PerfInput.Content == PerfContent::LBRStack) {
345
    PerfReader.reset(
346
        new HybridPerfReader(Binary, PerfInput.InputFile, PIDFilter));
347
  } else if (PerfInput.Content == PerfContent::LBR) {
348
    PerfReader.reset(new LBRPerfReader(Binary, PerfInput.InputFile, PIDFilter));
349
  } else {
350
    exitWithError("Unsupported perfscript!");
351
  }
352

353
  return PerfReader;
354
}
355

356
PerfInputFile
357
PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
358
                                         PerfInputFile &File,
359
                                         std::optional<int32_t> PIDFilter) {
360
  StringRef PerfData = File.InputFile;
361
  // Run perf script to retrieve PIDs matching binary we're interested in.
362
  auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf");
363
  if (!PerfExecutable) {
364
    exitWithError("Perf not found.");
365
  }
366
  std::string PerfPath = *PerfExecutable;
367
  SmallString<128> PerfTraceFile;
368
  sys::fs::createUniquePath("perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp",
369
                            PerfTraceFile, /*MakeAbsolute=*/true);
370
  std::string ErrorFile = std::string(PerfTraceFile) + ".err";
371
  std::optional<StringRef> Redirects[] = {std::nullopt,             // Stdin
372
                                          StringRef(PerfTraceFile), // Stdout
373
                                          StringRef(ErrorFile)};    // Stderr
374
  PerfScriptReader::TempFileCleanups.emplace_back(PerfTraceFile);
375
  PerfScriptReader::TempFileCleanups.emplace_back(ErrorFile);
376

377
  std::string PIDs;
378
  if (!SkipPID) {
379
    StringRef ScriptMMapArgs[] = {PerfPath, "script",   "--show-mmap-events",
380
                                  "-F",     "comm,pid", "-i",
381
                                  PerfData};
382
    sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects);
383

384
    // Collect the PIDs
385
    TraceStream TraceIt(PerfTraceFile);
386
    std::unordered_set<int32_t> PIDSet;
387
    while (!TraceIt.isAtEoF()) {
388
      MMapEvent MMap;
389
      if (isMMapEvent(TraceIt.getCurrentLine()) &&
390
          extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
391
        auto It = PIDSet.emplace(MMap.PID);
392
        if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
393
          if (!PIDs.empty()) {
394
            PIDs.append(",");
395
          }
396
          PIDs.append(utostr(MMap.PID));
397
        }
398
      }
399
      TraceIt.advance();
400
    }
401

402
    if (PIDs.empty()) {
403
      exitWithError("No relevant mmap event is found in perf data.");
404
    }
405
  }
406

407
  // Run perf script again to retrieve events for PIDs collected above
408
  SmallVector<StringRef, 8> ScriptSampleArgs;
409
  ScriptSampleArgs.push_back(PerfPath);
410
  ScriptSampleArgs.push_back("script");
411
  ScriptSampleArgs.push_back("--show-mmap-events");
412
  ScriptSampleArgs.push_back("-F");
413
  ScriptSampleArgs.push_back("ip,brstack");
414
  ScriptSampleArgs.push_back("-i");
415
  ScriptSampleArgs.push_back(PerfData);
416
  if (!PIDs.empty()) {
417
    ScriptSampleArgs.push_back("--pid");
418
    ScriptSampleArgs.push_back(PIDs);
419
  }
420
  sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, std::nullopt, Redirects);
421

422
  return {std::string(PerfTraceFile), PerfFormat::PerfScript,
423
          PerfContent::UnknownContent};
424
}
425

426
static StringRef filename(StringRef Path, bool UseBackSlash) {
427
  llvm::sys::path::Style PathStyle =
428
      UseBackSlash ? llvm::sys::path::Style::windows_backslash
429
                   : llvm::sys::path::Style::native;
430
  StringRef FileName = llvm::sys::path::filename(Path, PathStyle);
431

432
  // In case this file use \r\n as newline.
433
  if (UseBackSlash && FileName.back() == '\r')
434
    return FileName.drop_back();
435

436
  return FileName;
437
}
438

439
void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
440
  // Drop the event which doesn't belong to user-provided binary
441
  StringRef BinaryName = filename(Event.BinaryPath, Binary->isCOFF());
442
  bool IsKernel = Binary->isKernel();
443
  if (!IsKernel && Binary->getName() != BinaryName)
444
    return;
445
  if (IsKernel && !Binary->isKernelImageName(BinaryName))
446
    return;
447

448
  // Drop the event if process does not match pid filter
449
  if (PIDFilter && Event.PID != *PIDFilter)
450
    return;
451

452
  // Drop the event if its image is loaded at the same address
453
  if (Event.Address == Binary->getBaseAddress()) {
454
    Binary->setIsLoadedByMMap(true);
455
    return;
456
  }
457

458
  if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) {
459
    // A binary image could be unloaded and then reloaded at different
460
    // place, so update binary load address.
461
    // Only update for the first executable segment and assume all other
462
    // segments are loaded at consecutive memory addresses, which is the case on
463
    // X64.
464
    Binary->setBaseAddress(Event.Address);
465
    Binary->setIsLoadedByMMap(true);
466
  } else {
467
    // Verify segments are loaded consecutively.
468
    const auto &Offsets = Binary->getTextSegmentOffsets();
469
    auto It = llvm::lower_bound(Offsets, Event.Offset);
470
    if (It != Offsets.end() && *It == Event.Offset) {
471
      // The event is for loading a separate executable segment.
472
      auto I = std::distance(Offsets.begin(), It);
473
      const auto &PreferredAddrs = Binary->getPreferredTextSegmentAddresses();
474
      if (PreferredAddrs[I] - Binary->getPreferredBaseAddress() !=
475
          Event.Address - Binary->getBaseAddress())
476
        exitWithError("Executable segments not loaded consecutively");
477
    } else {
478
      if (It == Offsets.begin())
479
        exitWithError("File offset not found");
480
      else {
481
        // Find the segment the event falls in. A large segment could be loaded
482
        // via multiple mmap calls with consecutive memory addresses.
483
        --It;
484
        assert(*It < Event.Offset);
485
        if (Event.Offset - *It != Event.Address - Binary->getBaseAddress())
486
          exitWithError("Segment not loaded by consecutive mmaps");
487
      }
488
    }
489
  }
490
}
491

492
static std::string getContextKeyStr(ContextKey *K,
493
                                    const ProfiledBinary *Binary) {
494
  if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {
495
    return SampleContext::getContextString(CtxKey->Context);
496
  } else if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(K)) {
497
    std::ostringstream OContextStr;
498
    for (uint32_t I = 0; I < CtxKey->Context.size(); I++) {
499
      if (OContextStr.str().size())
500
        OContextStr << " @ ";
501
      uint64_t Address = CtxKey->Context[I];
502
      if (UseOffset) {
503
        if (UseLoadableSegmentAsBase)
504
          Address -= Binary->getFirstLoadableAddress();
505
        else
506
          Address -= Binary->getPreferredBaseAddress();
507
      }
508
      OContextStr << "0x"
509
                  << utohexstr(Address,
510
                               /*LowerCase=*/true);
511
    }
512
    return OContextStr.str();
513
  } else {
514
    llvm_unreachable("unexpected key type");
515
  }
516
}
517

518
void HybridPerfReader::unwindSamples() {
519
  VirtualUnwinder Unwinder(&SampleCounters, Binary);
520
  for (const auto &Item : AggregatedSamples) {
521
    const PerfSample *Sample = Item.first.getPtr();
522
    Unwinder.unwind(Sample, Item.second);
523
  }
524

525
  // Warn about untracked frames due to missing probes.
526
  if (ShowDetailedWarning) {
527
    for (auto Address : Unwinder.getUntrackedCallsites())
528
      WithColor::warning() << "Profile context truncated due to missing probe "
529
                           << "for call instruction at "
530
                           << format("0x%" PRIx64, Address) << "\n";
531
  }
532

533
  emitWarningSummary(Unwinder.getUntrackedCallsites().size(),
534
                     SampleCounters.size(),
535
                     "of profiled contexts are truncated due to missing probe "
536
                     "for call instruction.");
537

538
  emitWarningSummary(
539
      Unwinder.NumMismatchedExtCallBranch, Unwinder.NumTotalBranches,
540
      "of branches'source is a call instruction but doesn't match call frame "
541
      "stack, likely due to unwinding error of external frame.");
542

543
  emitWarningSummary(Unwinder.NumPairedExtAddr * 2, Unwinder.NumTotalBranches,
544
                     "of branches containing paired external address.");
545

546
  emitWarningSummary(Unwinder.NumUnpairedExtAddr, Unwinder.NumTotalBranches,
547
                     "of branches containing external address but doesn't have "
548
                     "another external address to pair, likely due to "
549
                     "interrupt jmp or broken perf script.");
550

551
  emitWarningSummary(
552
      Unwinder.NumMismatchedProEpiBranch, Unwinder.NumTotalBranches,
553
      "of branches'source is a call instruction but doesn't match call frame "
554
      "stack, likely due to frame in prolog/epilog.");
555

556
  emitWarningSummary(Unwinder.NumMissingExternalFrame,
557
                     Unwinder.NumExtCallBranch,
558
                     "of artificial call branches but doesn't have an external "
559
                     "frame to match.");
560
}
561

562
bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
563
                                       SmallVectorImpl<LBREntry> &LBRStack) {
564
  // The raw format of LBR stack is like:
565
  // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
566
  //                           ... 0x4005c8/0x4005dc/P/-/-/0
567
  // It's in FIFO order and separated by whitespace.
568
  SmallVector<StringRef, 32> Records;
569
  TraceIt.getCurrentLine().rtrim().split(Records, " ", -1, false);
570
  auto WarnInvalidLBR = [](TraceStream &TraceIt) {
571
    WithColor::warning() << "Invalid address in LBR record at line "
572
                         << TraceIt.getLineNumber() << ": "
573
                         << TraceIt.getCurrentLine() << "\n";
574
  };
575

576
  // Skip the leading instruction pointer.
577
  size_t Index = 0;
578
  uint64_t LeadingAddr;
579
  if (!Records.empty() && !Records[0].contains('/')) {
580
    if (Records[0].getAsInteger(16, LeadingAddr)) {
581
      WarnInvalidLBR(TraceIt);
582
      TraceIt.advance();
583
      return false;
584
    }
585
    Index = 1;
586
  }
587

588
  // Now extract LBR samples - note that we do not reverse the
589
  // LBR entry order so we can unwind the sample stack as we walk
590
  // through LBR entries.
591
  while (Index < Records.size()) {
592
    auto &Token = Records[Index++];
593
    if (Token.size() == 0)
594
      continue;
595

596
    SmallVector<StringRef, 8> Addresses;
597
    Token.split(Addresses, "/");
598
    uint64_t Src;
599
    uint64_t Dst;
600

601
    // Stop at broken LBR records.
602
    if (Addresses.size() < 2 || Addresses[0].substr(2).getAsInteger(16, Src) ||
603
        Addresses[1].substr(2).getAsInteger(16, Dst)) {
604
      WarnInvalidLBR(TraceIt);
605
      break;
606
    }
607

608
    // Canonicalize to use preferred load address as base address.
609
    Src = Binary->canonicalizeVirtualAddress(Src);
610
    Dst = Binary->canonicalizeVirtualAddress(Dst);
611
    bool SrcIsInternal = Binary->addressIsCode(Src);
612
    bool DstIsInternal = Binary->addressIsCode(Dst);
613
    if (!SrcIsInternal)
614
      Src = ExternalAddr;
615
    if (!DstIsInternal)
616
      Dst = ExternalAddr;
617
    // Filter external-to-external case to reduce LBR trace size.
618
    if (!SrcIsInternal && !DstIsInternal)
619
      continue;
620

621
    LBRStack.emplace_back(LBREntry(Src, Dst));
622
  }
623
  TraceIt.advance();
624
  return !LBRStack.empty();
625
}
626

627
bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
628
                                        SmallVectorImpl<uint64_t> &CallStack) {
629
  // The raw format of call stack is like:
630
  //            4005dc      # leaf frame
631
  //	          400634
632
  //	          400684      # root frame
633
  // It's in bottom-up order with each frame in one line.
634

635
  // Extract stack frames from sample
636
  while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(" 0x")) {
637
    StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
638
    uint64_t FrameAddr = 0;
639
    if (FrameStr.getAsInteger(16, FrameAddr)) {
640
      // We might parse a non-perf sample line like empty line and comments,
641
      // skip it
642
      TraceIt.advance();
643
      return false;
644
    }
645
    TraceIt.advance();
646

647
    FrameAddr = Binary->canonicalizeVirtualAddress(FrameAddr);
648
    // Currently intermixed frame from different binaries is not supported.
649
    if (!Binary->addressIsCode(FrameAddr)) {
650
      if (CallStack.empty())
651
        NumLeafExternalFrame++;
652
      // Push a special value(ExternalAddr) for the external frames so that
653
      // unwinder can still work on this with artificial Call/Return branch.
654
      // After unwinding, the context will be truncated for external frame.
655
      // Also deduplicate the consecutive external addresses.
656
      if (CallStack.empty() || CallStack.back() != ExternalAddr)
657
        CallStack.emplace_back(ExternalAddr);
658
      continue;
659
    }
660

661
    // We need to translate return address to call address for non-leaf frames.
662
    if (!CallStack.empty()) {
663
      auto CallAddr = Binary->getCallAddrFromFrameAddr(FrameAddr);
664
      if (!CallAddr) {
665
        // Stop at an invalid return address caused by bad unwinding. This could
666
        // happen to frame-pointer-based unwinding and the callee functions that
667
        // do not have the frame pointer chain set up.
668
        InvalidReturnAddresses.insert(FrameAddr);
669
        break;
670
      }
671
      FrameAddr = CallAddr;
672
    }
673

674
    CallStack.emplace_back(FrameAddr);
675
  }
676

677
  // Strip out the bottom external addr.
678
  if (CallStack.size() > 1 && CallStack.back() == ExternalAddr)
679
    CallStack.pop_back();
680

681
  // Skip other unrelated line, find the next valid LBR line
682
  // Note that even for empty call stack, we should skip the address at the
683
  // bottom, otherwise the following pass may generate a truncated callstack
684
  while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with(" 0x")) {
685
    TraceIt.advance();
686
  }
687
  // Filter out broken stack sample. We may not have complete frame info
688
  // if sample end up in prolog/epilog, the result is dangling context not
689
  // connected to entry point. This should be relatively rare thus not much
690
  // impact on overall profile quality. However we do want to filter them
691
  // out to reduce the number of different calling contexts. One instance
692
  // of such case - when sample landed in prolog/epilog, somehow stack
693
  // walking will be broken in an unexpected way that higher frames will be
694
  // missing.
695
  return !CallStack.empty() &&
696
         !Binary->addressInPrologEpilog(CallStack.front());
697
}
698

699
void PerfScriptReader::warnIfMissingMMap() {
700
  if (!Binary->getMissingMMapWarned() && !Binary->getIsLoadedByMMap()) {
701
    WithColor::warning() << "No relevant mmap event is matched for "
702
                         << Binary->getName()
703
                         << ", will use preferred address ("
704
                         << format("0x%" PRIx64,
705
                                   Binary->getPreferredBaseAddress())
706
                         << ") as the base loading address!\n";
707
    // Avoid redundant warning, only warn at the first unmatched sample.
708
    Binary->setMissingMMapWarned(true);
709
  }
710
}
711

712
void HybridPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
713
  // The raw hybird sample started with call stack in FILO order and followed
714
  // intermediately by LBR sample
715
  // e.g.
716
  // 	          4005dc    # call stack leaf
717
  //	          400634
718
  //	          400684    # call stack root
719
  // 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
720
  //          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
721
  //
722
  std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
723
#ifndef NDEBUG
724
  Sample->Linenum = TraceIt.getLineNumber();
725
#endif
726
  // Parsing call stack and populate into PerfSample.CallStack
727
  if (!extractCallstack(TraceIt, Sample->CallStack)) {
728
    // Skip the next LBR line matched current call stack
729
    if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(" 0x"))
730
      TraceIt.advance();
731
    return;
732
  }
733

734
  warnIfMissingMMap();
735

736
  if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().starts_with(" 0x")) {
737
    // Parsing LBR stack and populate into PerfSample.LBRStack
738
    if (extractLBRStack(TraceIt, Sample->LBRStack)) {
739
      if (IgnoreStackSamples) {
740
        Sample->CallStack.clear();
741
      } else {
742
        // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
743
        // ranges
744
        Sample->CallStack.front() = Sample->LBRStack[0].Target;
745
      }
746
      // Record samples by aggregation
747
      AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
748
    }
749
  } else {
750
    // LBR sample is encoded in single line after stack sample
751
    exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");
752
  }
753
}
754

755
void PerfScriptReader::writeUnsymbolizedProfile(StringRef Filename) {
756
  std::error_code EC;
757
  raw_fd_ostream OS(Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
758
  if (EC)
759
    exitWithError(EC, Filename);
760
  writeUnsymbolizedProfile(OS);
761
}
762

763
// Use ordered map to make the output deterministic
764
using OrderedCounterForPrint = std::map<std::string, SampleCounter *>;
765

766
void PerfScriptReader::writeUnsymbolizedProfile(raw_fd_ostream &OS) {
767
  OrderedCounterForPrint OrderedCounters;
768
  for (auto &CI : SampleCounters) {
769
    OrderedCounters[getContextKeyStr(CI.first.getPtr(), Binary)] = &CI.second;
770
  }
771

772
  auto SCounterPrinter = [&](RangeSample &Counter, StringRef Separator,
773
                             uint32_t Indent) {
774
    OS.indent(Indent);
775
    OS << Counter.size() << "\n";
776
    for (auto &I : Counter) {
777
      uint64_t Start = I.first.first;
778
      uint64_t End = I.first.second;
779

780
      if (UseOffset) {
781
        if (UseLoadableSegmentAsBase) {
782
          Start -= Binary->getFirstLoadableAddress();
783
          End -= Binary->getFirstLoadableAddress();
784
        } else {
785
          Start -= Binary->getPreferredBaseAddress();
786
          End -= Binary->getPreferredBaseAddress();
787
        }
788
      }
789

790
      OS.indent(Indent);
791
      OS << Twine::utohexstr(Start) << Separator << Twine::utohexstr(End) << ":"
792
         << I.second << "\n";
793
    }
794
  };
795

796
  for (auto &CI : OrderedCounters) {
797
    uint32_t Indent = 0;
798
    if (ProfileIsCS) {
799
      // Context string key
800
      OS << "[" << CI.first << "]\n";
801
      Indent = 2;
802
    }
803

804
    SampleCounter &Counter = *CI.second;
805
    SCounterPrinter(Counter.RangeCounter, "-", Indent);
806
    SCounterPrinter(Counter.BranchCounter, "->", Indent);
807
  }
808
}
809

810
// Format of input:
811
// number of entries in RangeCounter
812
// from_1-to_1:count_1
813
// from_2-to_2:count_2
814
// ......
815
// from_n-to_n:count_n
816
// number of entries in BranchCounter
817
// src_1->dst_1:count_1
818
// src_2->dst_2:count_2
819
// ......
820
// src_n->dst_n:count_n
821
void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt,
822
                                                   SampleCounter &SCounters) {
823
  auto exitWithErrorForTraceLine = [](TraceStream &TraceIt) {
824
    std::string Msg = TraceIt.isAtEoF()
825
                          ? "Invalid raw profile!"
826
                          : "Invalid raw profile at line " +
827
                                Twine(TraceIt.getLineNumber()).str() + ": " +
828
                                TraceIt.getCurrentLine().str();
829
    exitWithError(Msg);
830
  };
831
  auto ReadNumber = [&](uint64_t &Num) {
832
    if (TraceIt.isAtEoF())
833
      exitWithErrorForTraceLine(TraceIt);
834
    if (TraceIt.getCurrentLine().ltrim().getAsInteger(10, Num))
835
      exitWithErrorForTraceLine(TraceIt);
836
    TraceIt.advance();
837
  };
838

839
  auto ReadCounter = [&](RangeSample &Counter, StringRef Separator) {
840
    uint64_t Num = 0;
841
    ReadNumber(Num);
842
    while (Num--) {
843
      if (TraceIt.isAtEoF())
844
        exitWithErrorForTraceLine(TraceIt);
845
      StringRef Line = TraceIt.getCurrentLine().ltrim();
846

847
      uint64_t Count = 0;
848
      auto LineSplit = Line.split(":");
849
      if (LineSplit.second.empty() || LineSplit.second.getAsInteger(10, Count))
850
        exitWithErrorForTraceLine(TraceIt);
851

852
      uint64_t Source = 0;
853
      uint64_t Target = 0;
854
      auto Range = LineSplit.first.split(Separator);
855
      if (Range.second.empty() || Range.first.getAsInteger(16, Source) ||
856
          Range.second.getAsInteger(16, Target))
857
        exitWithErrorForTraceLine(TraceIt);
858

859
      if (UseOffset) {
860
        if (UseLoadableSegmentAsBase) {
861
          Source += Binary->getFirstLoadableAddress();
862
          Target += Binary->getFirstLoadableAddress();
863
        } else {
864
          Source += Binary->getPreferredBaseAddress();
865
          Target += Binary->getPreferredBaseAddress();
866
        }
867
      }
868

869
      Counter[{Source, Target}] += Count;
870
      TraceIt.advance();
871
    }
872
  };
873

874
  ReadCounter(SCounters.RangeCounter, "-");
875
  ReadCounter(SCounters.BranchCounter, "->");
876
}
877

878
void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) {
879
  TraceStream TraceIt(FileName);
880
  while (!TraceIt.isAtEoF()) {
881
    std::shared_ptr<StringBasedCtxKey> Key =
882
        std::make_shared<StringBasedCtxKey>();
883
    StringRef Line = TraceIt.getCurrentLine();
884
    // Read context stack for CS profile.
885
    if (Line.starts_with("[")) {
886
      ProfileIsCS = true;
887
      auto I = ContextStrSet.insert(Line.str());
888
      SampleContext::createCtxVectorFromStr(*I.first, Key->Context);
889
      TraceIt.advance();
890
    }
891
    auto Ret =
892
        SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
893
    readSampleCounters(TraceIt, Ret.first->second);
894
  }
895
}
896

897
void UnsymbolizedProfileReader::parsePerfTraces() {
898
  readUnsymbolizedProfile(PerfTraceFile);
899
}
900

901
void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
902
                                             uint64_t Repeat) {
903
  SampleCounter &Counter = SampleCounters.begin()->second;
904
  uint64_t EndAddress = 0;
905
  for (const LBREntry &LBR : Sample->LBRStack) {
906
    uint64_t SourceAddress = LBR.Source;
907
    uint64_t TargetAddress = LBR.Target;
908

909
    // Record the branch if its SourceAddress is external. It can be the case an
910
    // external source call an internal function, later this branch will be used
911
    // to generate the function's head sample.
912
    if (Binary->addressIsCode(TargetAddress)) {
913
      Counter.recordBranchCount(SourceAddress, TargetAddress, Repeat);
914
    }
915

916
    // If this not the first LBR, update the range count between TO of current
917
    // LBR and FROM of next LBR.
918
    uint64_t StartAddress = TargetAddress;
919
    if (Binary->addressIsCode(StartAddress) &&
920
        Binary->addressIsCode(EndAddress) &&
921
        isValidFallThroughRange(StartAddress, EndAddress, Binary))
922
      Counter.recordRangeCount(StartAddress, EndAddress, Repeat);
923
    EndAddress = SourceAddress;
924
  }
925
}
926

927
void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) {
928
  std::shared_ptr<PerfSample> Sample = std::make_shared<PerfSample>();
929
  // Parsing LBR stack and populate into PerfSample.LBRStack
930
  if (extractLBRStack(TraceIt, Sample->LBRStack)) {
931
    warnIfMissingMMap();
932
    // Record LBR only samples by aggregation
933
    AggregatedSamples[Hashable<PerfSample>(Sample)] += Count;
934
  }
935
}
936

937
void PerfScriptReader::generateUnsymbolizedProfile() {
938
  // There is no context for LBR only sample, so initialize one entry with
939
  // fake "empty" context key.
940
  assert(SampleCounters.empty() &&
941
         "Sample counter map should be empty before raw profile generation");
942
  std::shared_ptr<StringBasedCtxKey> Key =
943
      std::make_shared<StringBasedCtxKey>();
944
  SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
945
  for (const auto &Item : AggregatedSamples) {
946
    const PerfSample *Sample = Item.first.getPtr();
947
    computeCounterFromLBR(Sample, Item.second);
948
  }
949
}
950

951
uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) {
952
  // The aggregated count is optional, so do not skip the line and return 1 if
953
  // it's unmatched
954
  uint64_t Count = 1;
955
  if (!TraceIt.getCurrentLine().getAsInteger(10, Count))
956
    TraceIt.advance();
957
  return Count;
958
}
959

960
void PerfScriptReader::parseSample(TraceStream &TraceIt) {
961
  NumTotalSample++;
962
  uint64_t Count = parseAggregatedCount(TraceIt);
963
  assert(Count >= 1 && "Aggregated count should be >= 1!");
964
  parseSample(TraceIt, Count);
965
}
966

967
bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
968
                                                 StringRef Line,
969
                                                 MMapEvent &MMap) {
970
  // Parse a MMap2 line like:
971
  //  PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
972
  //  08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
973
  constexpr static const char *const MMap2Pattern =
974
      "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: "
975
      "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
976
      "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
977
  // Parse a MMap line like
978
  // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \
979
  //  0xffffffff81e00000]: x [kernel.kallsyms]_text
980
  constexpr static const char *const MMapPattern =
981
      "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: "
982
      "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
983
      "(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)";
984
  // Field 0 - whole line
985
  // Field 1 - PID
986
  // Field 2 - base address
987
  // Field 3 - mmapped size
988
  // Field 4 - page offset
989
  // Field 5 - binary path
990
  enum EventIndex {
991
    WHOLE_LINE = 0,
992
    PID = 1,
993
    MMAPPED_ADDRESS = 2,
994
    MMAPPED_SIZE = 3,
995
    PAGE_OFFSET = 4,
996
    BINARY_PATH = 5
997
  };
998

999
  bool R = false;
1000
  SmallVector<StringRef, 6> Fields;
1001
  if (Line.contains("PERF_RECORD_MMAP2 ")) {
1002
    Regex RegMmap2(MMap2Pattern);
1003
    R = RegMmap2.match(Line, &Fields);
1004
  } else if (Line.contains("PERF_RECORD_MMAP ")) {
1005
    Regex RegMmap(MMapPattern);
1006
    R = RegMmap.match(Line, &Fields);
1007
  } else
1008
    llvm_unreachable("unexpected MMAP event entry");
1009

1010
  if (!R) {
1011
    std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n";
1012
    WithColor::warning() << WarningMsg;
1013
    return false;
1014
  }
1015
  long long MMapPID = 0;
1016
  getAsSignedInteger(Fields[PID], 10, MMapPID);
1017
  MMap.PID = MMapPID;
1018
  Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address);
1019
  Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size);
1020
  Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset);
1021
  MMap.BinaryPath = Fields[BINARY_PATH];
1022
  if (ShowMmapEvents) {
1023
    outs() << "Mmap: Binary " << MMap.BinaryPath << " loaded at "
1024
           << format("0x%" PRIx64 ":", MMap.Address) << " \n";
1025
  }
1026

1027
  StringRef BinaryName = filename(MMap.BinaryPath, Binary->isCOFF());
1028
  if (Binary->isKernel()) {
1029
    return Binary->isKernelImageName(BinaryName);
1030
  }
1031
  return Binary->getName() == BinaryName;
1032
}
1033

1034
void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) {
1035
  MMapEvent MMap;
1036
  if (extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
1037
    updateBinaryAddress(MMap);
1038
  TraceIt.advance();
1039
}
1040

1041
void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
1042
  if (isMMapEvent(TraceIt.getCurrentLine()))
1043
    parseMMapEvent(TraceIt);
1044
  else
1045
    parseSample(TraceIt);
1046
}
1047

1048
void PerfScriptReader::parseAndAggregateTrace() {
1049
  // Trace line iterator
1050
  TraceStream TraceIt(PerfTraceFile);
1051
  while (!TraceIt.isAtEoF())
1052
    parseEventOrSample(TraceIt);
1053
}
1054

1055
// A LBR sample is like:
1056
// 40062f 0x5c6313f/0x5c63170/P/-/-/0  0x5c630e7/0x5c63130/P/-/-/0 ...
1057
// A heuristic for fast detection by checking whether a
1058
// leading "  0x" and the '/' exist.
1059
bool PerfScriptReader::isLBRSample(StringRef Line) {
1060
  // Skip the leading instruction pointer
1061
  SmallVector<StringRef, 32> Records;
1062
  Line.trim().split(Records, " ", 2, false);
1063
  if (Records.size() < 2)
1064
    return false;
1065
  if (Records[1].starts_with("0x") && Records[1].contains('/'))
1066
    return true;
1067
  return false;
1068
}
1069

1070
bool PerfScriptReader::isMMapEvent(StringRef Line) {
1071
  // Short cut to avoid string find is possible.
1072
  if (Line.empty() || Line.size() < 50)
1073
    return false;
1074

1075
  if (std::isdigit(Line[0]))
1076
    return false;
1077

1078
  // PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of
1079
  // the line for ` perf script  --show-mmap-events  -i ...`
1080
  return Line.contains("PERF_RECORD_MMAP");
1081
}
1082

1083
// The raw hybird sample is like
1084
// e.g.
1085
// 	          4005dc    # call stack leaf
1086
//	          400634
1087
//	          400684    # call stack root
1088
// 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
1089
//          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
1090
// Determine the perfscript contains hybrid samples(call stack + LBRs) by
1091
// checking whether there is a non-empty call stack immediately followed by
1092
// a LBR sample
1093
PerfContent PerfScriptReader::checkPerfScriptType(StringRef FileName) {
1094
  TraceStream TraceIt(FileName);
1095
  uint64_t FrameAddr = 0;
1096
  while (!TraceIt.isAtEoF()) {
1097
    // Skip the aggregated count
1098
    if (!TraceIt.getCurrentLine().getAsInteger(10, FrameAddr))
1099
      TraceIt.advance();
1100

1101
    // Detect sample with call stack
1102
    int32_t Count = 0;
1103
    while (!TraceIt.isAtEoF() &&
1104
           !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) {
1105
      Count++;
1106
      TraceIt.advance();
1107
    }
1108
    if (!TraceIt.isAtEoF()) {
1109
      if (isLBRSample(TraceIt.getCurrentLine())) {
1110
        if (Count > 0)
1111
          return PerfContent::LBRStack;
1112
        else
1113
          return PerfContent::LBR;
1114
      }
1115
      TraceIt.advance();
1116
    }
1117
  }
1118

1119
  exitWithError("Invalid perf script input!");
1120
  return PerfContent::UnknownContent;
1121
}
1122

1123
void HybridPerfReader::generateUnsymbolizedProfile() {
1124
  ProfileIsCS = !IgnoreStackSamples;
1125
  if (ProfileIsCS)
1126
    unwindSamples();
1127
  else
1128
    PerfScriptReader::generateUnsymbolizedProfile();
1129
}
1130

1131
void PerfScriptReader::warnTruncatedStack() {
1132
  if (ShowDetailedWarning) {
1133
    for (auto Address : InvalidReturnAddresses) {
1134
      WithColor::warning()
1135
          << "Truncated stack sample due to invalid return address at "
1136
          << format("0x%" PRIx64, Address)
1137
          << ", likely caused by frame pointer omission\n";
1138
    }
1139
  }
1140
  emitWarningSummary(
1141
      InvalidReturnAddresses.size(), AggregatedSamples.size(),
1142
      "of truncated stack samples due to invalid return address, "
1143
      "likely caused by frame pointer omission.");
1144
}
1145

1146
void PerfScriptReader::warnInvalidRange() {
1147
  std::unordered_map<std::pair<uint64_t, uint64_t>, uint64_t,
1148
                     pair_hash<uint64_t, uint64_t>>
1149
      Ranges;
1150

1151
  for (const auto &Item : AggregatedSamples) {
1152
    const PerfSample *Sample = Item.first.getPtr();
1153
    uint64_t Count = Item.second;
1154
    uint64_t EndAddress = 0;
1155
    for (const LBREntry &LBR : Sample->LBRStack) {
1156
      uint64_t SourceAddress = LBR.Source;
1157
      uint64_t StartAddress = LBR.Target;
1158
      if (EndAddress != 0)
1159
        Ranges[{StartAddress, EndAddress}] += Count;
1160
      EndAddress = SourceAddress;
1161
    }
1162
  }
1163

1164
  if (Ranges.empty()) {
1165
    WithColor::warning() << "No samples in perf script!\n";
1166
    return;
1167
  }
1168

1169
  auto WarnInvalidRange = [&](uint64_t StartAddress, uint64_t EndAddress,
1170
                              StringRef Msg) {
1171
    if (!ShowDetailedWarning)
1172
      return;
1173
    WithColor::warning() << "[" << format("%8" PRIx64, StartAddress) << ","
1174
                         << format("%8" PRIx64, EndAddress) << "]: " << Msg
1175
                         << "\n";
1176
  };
1177

1178
  const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
1179
                                  "likely due to profile and binary mismatch.";
1180
  const char *DanglingRangeMsg = "Range does not belong to any functions, "
1181
                                 "likely from PLT, .init or .fini section.";
1182
  const char *RangeCrossFuncMsg =
1183
      "Fall through range should not cross function boundaries, likely due to "
1184
      "profile and binary mismatch.";
1185
  const char *BogusRangeMsg = "Range start is after or too far from range end.";
1186

1187
  uint64_t TotalRangeNum = 0;
1188
  uint64_t InstNotBoundary = 0;
1189
  uint64_t UnmatchedRange = 0;
1190
  uint64_t RangeCrossFunc = 0;
1191
  uint64_t BogusRange = 0;
1192

1193
  for (auto &I : Ranges) {
1194
    uint64_t StartAddress = I.first.first;
1195
    uint64_t EndAddress = I.first.second;
1196
    TotalRangeNum += I.second;
1197

1198
    if (!Binary->addressIsCode(StartAddress) &&
1199
        !Binary->addressIsCode(EndAddress))
1200
      continue;
1201

1202
    if (!Binary->addressIsCode(StartAddress) ||
1203
        !Binary->addressIsTransfer(EndAddress)) {
1204
      InstNotBoundary += I.second;
1205
      WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg);
1206
    }
1207

1208
    auto *FRange = Binary->findFuncRange(StartAddress);
1209
    if (!FRange) {
1210
      UnmatchedRange += I.second;
1211
      WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg);
1212
      continue;
1213
    }
1214

1215
    if (EndAddress >= FRange->EndAddress) {
1216
      RangeCrossFunc += I.second;
1217
      WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg);
1218
    }
1219

1220
    if (Binary->addressIsCode(StartAddress) &&
1221
        Binary->addressIsCode(EndAddress) &&
1222
        !isValidFallThroughRange(StartAddress, EndAddress, Binary)) {
1223
      BogusRange += I.second;
1224
      WarnInvalidRange(StartAddress, EndAddress, BogusRangeMsg);
1225
    }
1226
  }
1227

1228
  emitWarningSummary(
1229
      InstNotBoundary, TotalRangeNum,
1230
      "of samples are from ranges that are not on instruction boundary.");
1231
  emitWarningSummary(
1232
      UnmatchedRange, TotalRangeNum,
1233
      "of samples are from ranges that do not belong to any functions.");
1234
  emitWarningSummary(
1235
      RangeCrossFunc, TotalRangeNum,
1236
      "of samples are from ranges that do cross function boundaries.");
1237
  emitWarningSummary(
1238
      BogusRange, TotalRangeNum,
1239
      "of samples are from ranges that have range start after or too far from "
1240
      "range end acrossing the unconditinal jmp.");
1241
}
1242

1243
void PerfScriptReader::parsePerfTraces() {
1244
  // Parse perf traces and do aggregation.
1245
  parseAndAggregateTrace();
1246
  if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) {
1247
    exitWithError(
1248
        "Kernel is requested, but no kernel is found in mmap events.");
1249
  }
1250

1251
  emitWarningSummary(NumLeafExternalFrame, NumTotalSample,
1252
                     "of samples have leaf external frame in call stack.");
1253
  emitWarningSummary(NumLeadingOutgoingLBR, NumTotalSample,
1254
                     "of samples have leading external LBR.");
1255

1256
  // Generate unsymbolized profile.
1257
  warnTruncatedStack();
1258
  warnInvalidRange();
1259
  generateUnsymbolizedProfile();
1260
  AggregatedSamples.clear();
1261

1262
  if (SkipSymbolization)
1263
    writeUnsymbolizedProfile(OutputFilename);
1264
}
1265

1266
SmallVector<CleanupInstaller, 2> PerfScriptReader::TempFileCleanups;
1267

1268
} // end namespace sampleprof
1269
} // end namespace llvm
1270

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.