1
#include <c10/core/Allocator.h>
2
#include <c10/util/Exception.h>
3
#include <c10/util/overloaded.h>
4
#include <torch/csrc/jit/mobile/profiler_edge.h>
12
thread_local KinetoEdgeCPUProfiler* tls_edge_profiler{nullptr};
14
KinetoEdgeCPUProfiler::KinetoEdgeCPUProfiler(
15
const torch::jit::mobile::Module& m,
16
const std::string& fname,
17
const bool report_input_shapes,
18
const bool profile_memory,
19
const bool with_stack,
20
const bool with_flops,
21
const bool with_modules,
22
std::vector<std::string> events,
23
const bool adjust_vulkan_timestamps)
24
: m_(m), trace_file_name_(fname) {
25
torch::profiler::impl::ExperimentalConfig experimental_config;
28
experimental_config.performance_events = std::move(events);
32
experimental_config.adjust_timestamps = adjust_vulkan_timestamps;
34
torch::profiler::impl::ProfilerConfig config(
35
torch::profiler::impl::ProfilerState::KINETO,
42
torch::autograd::profiler::prepareProfiler(
43
config, {torch::autograd::profiler::ActivityType::CPU});
44
if (with_modules || with_stack) {
45
auto post_processing = [this, with_stack, with_modules](
47
std::vector<std::string>& jit_stack,
48
std::vector<std::string>& jit_modules) {
49
std::string no_debug_info("Model was not saved with debug information");
53
jit_modules = std::vector<std::string>(
54
{this->m_.hasDebugHandles()
55
? this->m_.getModuleHierarchy(debug_handle)
57
} else if (with_stack) {
60
jit_stack = std::vector<std::string>(
61
{this->m_.hasDebugHandles() ? this->m_.getCallStack(debug_handle)
65
torch::autograd::profiler::enableProfilerWithEventPostProcess(
67
{torch::autograd::profiler::ActivityType::CPU},
69
{at::RecordScope::LITE_INTERPRETER});
71
torch::autograd::profiler::enableProfiler(
73
{torch::autograd::profiler::ActivityType::CPU},
74
{at::RecordScope::LITE_INTERPRETER});
76
trace_file_name_ = fname;
78
tls_edge_profiler == nullptr, "Edge profiler is already profiling.")
79
tls_edge_profiler = this;
82
void KinetoEdgeCPUProfiler::recordBackendMemoryEvent(
85
size_t total_allocated,
86
size_t total_reserved,
88
c10::reportMemoryUsageToProfiler(
89
ptr, alloc_size, total_allocated, total_reserved, device);
92
void KinetoEdgeCPUProfiler::recordBackendEvent(
93
const int64_t start_time_us,
94
const int64_t end_time_us,
95
const int64_t debug_handle,
96
const std::string& event_name,
97
const std::string& backend_name) {
98
torch::autograd::profiler::reportBackendEventToActiveKinetoProfiler(
102
at::RecordScope::LITE_INTERPRETER,
107
const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
108
KinetoEdgeCPUProfiler::disableProfiler() {
111
"KinetoEdgeCPUProfiler already disabled. "
112
"To get list of events use getProfilerResults()");
113
profiler_result_ = torch::autograd::profiler::disableProfiler();
114
return profiler_result_;
117
const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
118
KinetoEdgeCPUProfiler::getProfilerResult() {
121
"KinetoEdgeCPUProfiler has not been disabled. "
122
"use disableProfiler() API first, which returns the ProfilerResult.");
123
return profiler_result_;
126
KinetoEdgeCPUProfiler::~KinetoEdgeCPUProfiler() {
127
if (!trace_file_name_.empty()) {
128
if (profiler_result_) {
129
profiler_result_->save(trace_file_name_);
131
torch::autograd::profiler::disableProfiler()->save(trace_file_name_);
134
tls_edge_profiler = nullptr;
137
KinetoEdgeCPUProfiler* getCurrentEdgeProfiler() {
138
return tls_edge_profiler;