2
#include <gtest/gtest.h>
4
#include <torch/csrc/profiler/events.h>
5
#include <torch/csrc/profiler/perf.h>
8
volatile double pi = 1.0;
9
for (int i = 3; i < 100000; i += 2) {
10
pi += (((i + 1) >> 1) % 2) ? 1.0 / i : -1.0 / i;
15
TEST(ProfilerTest, LinuxPerf) {
16
torch::profiler::impl::linux_perf::PerfProfiler profiler;
18
std::vector<std::string> standard_events(
19
std::begin(torch::profiler::ProfilerPerfEvents),
20
std::end(torch::profiler::ProfilerPerfEvents));
21
torch::profiler::perf_counters_t counters;
22
counters.resize(standard_events.size(), 0);
27
profiler.Configure(standard_events);
31
profiler.Disable(counters);
32
} catch (const c10::Error&) {
43
#if defined(__ANDROID__) || defined(__linux__)
44
for (auto counter : counters) {
45
ASSERT_GT(counter, 0);
48
for (auto counter : counters) {
49
ASSERT_EQ(counter, 0);
54
TEST(ProfilerTest, LinuxPerfNestedDepth) {
55
torch::profiler::impl::linux_perf::PerfProfiler profiler;
58
std::vector<std::string> standard_events(
59
std::begin(torch::profiler::ProfilerPerfEvents),
60
std::end(torch::profiler::ProfilerPerfEvents));
62
torch::profiler::perf_counters_t counters_A;
63
torch::profiler::perf_counters_t counters_B;
64
torch::profiler::perf_counters_t counters_C;
66
counters_A.resize(standard_events.size(), 0);
67
counters_B.resize(standard_events.size(), 0);
68
counters_C.resize(standard_events.size(), 0);
73
profiler.Configure(standard_events);
94
profiler.Disable(counters_C);
97
profiler.Disable(counters_B);
100
profiler.Disable(counters_A);
101
} catch (const c10::Error&) {
111
#if defined(__ANDROID__) || defined(__linux__)
112
for (auto i = 0; i < standard_events.size(); ++i) {
113
ASSERT_GT(counters_A[i], counters_B[i]);
114
ASSERT_GT(counters_A[i], counters_C[i]);
115
ASSERT_GT(counters_B[i], counters_C[i]);
116
ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
119
for (auto i = 0; i < standard_events.size(); ++i) {
120
ASSERT_EQ(counters_A[i], 0);
121
ASSERT_EQ(counters_B[i], 0);
122
ASSERT_EQ(counters_C[i], 0);
127
TEST(ProfilerTest, LinuxPerfNestedMultiple) {
128
torch::profiler::impl::linux_perf::PerfProfiler profiler;
131
std::vector<std::string> standard_events(
132
std::begin(torch::profiler::ProfilerPerfEvents),
133
std::end(torch::profiler::ProfilerPerfEvents));
135
torch::profiler::perf_counters_t counters_A;
136
torch::profiler::perf_counters_t counters_B;
137
torch::profiler::perf_counters_t counters_C;
139
counters_A.resize(standard_events.size(), 0);
140
counters_B.resize(standard_events.size(), 0);
141
counters_C.resize(standard_events.size(), 0);
146
profiler.Configure(standard_events);
161
profiler.Disable(counters_B);
167
profiler.Disable(counters_C);
170
profiler.Disable(counters_A);
171
} catch (const c10::Error&) {
181
#if defined(__ANDROID__) || defined(__linux__)
182
for (auto i = 0; i < standard_events.size(); ++i) {
183
ASSERT_GT(counters_A[i], counters_B[i]);
184
ASSERT_GT(counters_A[i], counters_C[i]);
185
ASSERT_GT(counters_B[i], counters_C[i]);
186
ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
189
for (auto i = 0; i < standard_events.size(); ++i) {
190
ASSERT_EQ(counters_A[i], 0);
191
ASSERT_EQ(counters_B[i], 0);
192
ASSERT_EQ(counters_C[i], 0);
197
TEST(ProfilerTest, LinuxPerfNestedSingle) {
198
torch::profiler::impl::linux_perf::PerfProfiler profiler;
201
std::vector<std::string> standard_events(
202
std::begin(torch::profiler::ProfilerPerfEvents),
203
std::end(torch::profiler::ProfilerPerfEvents));
205
torch::profiler::perf_counters_t counters_A;
206
torch::profiler::perf_counters_t counters_B;
207
torch::profiler::perf_counters_t counters_C;
209
counters_A.resize(standard_events.size(), 0);
210
counters_B.resize(standard_events.size(), 0);
211
counters_C.resize(standard_events.size(), 0);
216
profiler.Configure(standard_events);
222
profiler.Disable(counters_C);
223
profiler.Disable(counters_B);
224
profiler.Disable(counters_A);
225
} catch (const c10::Error&) {
235
#if defined(__ANDROID__) || defined(__linux__)
236
for (auto i = 0; i < standard_events.size(); ++i) {
237
ASSERT_GE(counters_A[i], counters_B[i]);
238
ASSERT_GE(counters_A[i], counters_C[i]);
239
ASSERT_GE(counters_B[i], counters_C[i]);
242
for (auto i = 0; i < standard_events.size(); ++i) {
243
ASSERT_EQ(counters_A[i], 0);
244
ASSERT_EQ(counters_B[i], 0);
245
ASSERT_EQ(counters_C[i], 0);