pytorch

profiler_edge.cpp
143 строки · 4.7 Кб
Перенос по словам
1
#include <c10/core/Allocator.h>
2
#include <c10/util/Exception.h>
3
#include <c10/util/overloaded.h>
4
#include <torch/csrc/jit/mobile/profiler_edge.h>
5
#include <string>
6
#include <vector>
7

8
namespace torch {
9
namespace jit {
10
namespace mobile {
11

12
thread_local KinetoEdgeCPUProfiler* tls_edge_profiler{nullptr};
13

14
KinetoEdgeCPUProfiler::KinetoEdgeCPUProfiler(
15
    const torch::jit::mobile::Module& m,
16
    const std::string& fname,
17
    const bool report_input_shapes,
18
    const bool profile_memory,
19
    const bool with_stack,
20
    const bool with_flops,
21
    const bool with_modules,
22
    std::vector<std::string> events,
23
    const bool adjust_vulkan_timestamps)
24
    : m_(m), trace_file_name_(fname) {
25
  torch::profiler::impl::ExperimentalConfig experimental_config;
26
  // Enable hardware counters
27
  if (events.size()) {
28
    experimental_config.performance_events = std::move(events);
29
  }
30

31
  // Adjust vulkan timestamps from query pool to align with cpu event times
32
  experimental_config.adjust_timestamps = adjust_vulkan_timestamps;
33

34
  torch::profiler::impl::ProfilerConfig config(
35
      torch::profiler::impl::ProfilerState::KINETO,
36
      report_input_shapes,
37
      profile_memory,
38
      with_stack,
39
      with_flops,
40
      with_modules,
41
      experimental_config);
42
  torch::autograd::profiler::prepareProfiler(
43
      config, {torch::autograd::profiler::ActivityType::CPU});
44
  if (with_modules || with_stack) {
45
    auto post_processing = [this, with_stack, with_modules](
46
                               int64_t debug_handle,
47
                               std::vector<std::string>& jit_stack,
48
                               std::vector<std::string>& jit_modules) {
49
      std::string no_debug_info("Model was not saved with debug information");
50
      if (with_modules) {
51
        // Since KinetoEvents's module hierarchy takes vector of strings
52
        // we just construct a temporary vector using one string element
53
        jit_modules = std::vector<std::string>(
54
            {this->m_.hasDebugHandles()
55
                 ? this->m_.getModuleHierarchy(debug_handle)
56
                 : no_debug_info});
57
      } else if (with_stack) {
58
        // Since KinetoEvents's stack trace takes vector of strings we
59
        // just construct a temporary vector using one string element
60
        jit_stack = std::vector<std::string>(
61
            {this->m_.hasDebugHandles() ? this->m_.getCallStack(debug_handle)
62
                                        : no_debug_info});
63
      }
64
    };
65
    torch::autograd::profiler::enableProfilerWithEventPostProcess(
66
        config,
67
        {torch::autograd::profiler::ActivityType::CPU},
68
        post_processing,
69
        {at::RecordScope::LITE_INTERPRETER});
70
  } else {
71
    torch::autograd::profiler::enableProfiler(
72
        config,
73
        {torch::autograd::profiler::ActivityType::CPU},
74
        {at::RecordScope::LITE_INTERPRETER});
75
  }
76
  trace_file_name_ = fname;
77
  TORCH_CHECK(
78
      tls_edge_profiler == nullptr, "Edge profiler is already profiling.")
79
  tls_edge_profiler = this;
80
}
81

82
void KinetoEdgeCPUProfiler::recordBackendMemoryEvent(
83
    void* ptr,
84
    int64_t alloc_size,
85
    size_t total_allocated,
86
    size_t total_reserved,
87
    c10::Device device) {
88
  c10::reportMemoryUsageToProfiler(
89
      ptr, alloc_size, total_allocated, total_reserved, device);
90
}
91

92
void KinetoEdgeCPUProfiler::recordBackendEvent(
93
    const int64_t start_time_us,
94
    const int64_t end_time_us,
95
    const int64_t debug_handle,
96
    const std::string& event_name,
97
    const std::string& backend_name) {
98
  torch::autograd::profiler::reportBackendEventToActiveKinetoProfiler(
99
      start_time_us,
100
      end_time_us,
101
      debug_handle,
102
      at::RecordScope::LITE_INTERPRETER,
103
      event_name,
104
      backend_name);
105
}
106

107
const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
108
KinetoEdgeCPUProfiler::disableProfiler() {
109
  TORCH_CHECK(
110
      !profiler_result_,
111
      "KinetoEdgeCPUProfiler already disabled. "
112
      "To get list of events use getProfilerResults()");
113
  profiler_result_ = torch::autograd::profiler::disableProfiler();
114
  return profiler_result_;
115
}
116

117
const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
118
KinetoEdgeCPUProfiler::getProfilerResult() {
119
  TORCH_CHECK(
120
      profiler_result_,
121
      "KinetoEdgeCPUProfiler has not been disabled. "
122
      "use disableProfiler() API first, which returns the ProfilerResult.");
123
  return profiler_result_;
124
}
125

126
KinetoEdgeCPUProfiler::~KinetoEdgeCPUProfiler() {
127
  if (!trace_file_name_.empty()) {
128
    if (profiler_result_) {
129
      profiler_result_->save(trace_file_name_);
130
    } else {
131
      torch::autograd::profiler::disableProfiler()->save(trace_file_name_);
132
    }
133
  }
134
  tls_edge_profiler = nullptr;
135
}
136

137
KinetoEdgeCPUProfiler* getCurrentEdgeProfiler() {
138
  return tls_edge_profiler;
139
}
140

141
} // namespace mobile
142
} // namespace jit
143
} // namespace torch
144
pytorch

Использование cookies