pytorch

perf_events.cpp
247 строк · 7.3 Кб
Перенос по словам
1

2
#include <gtest/gtest.h>
3

4
#include <torch/csrc/profiler/events.h>
5
#include <torch/csrc/profiler/perf.h>
6

7
double calc_pi() {
8
  volatile double pi = 1.0;
9
  for (int i = 3; i < 100000; i += 2) {
10
    pi += (((i + 1) >> 1) % 2) ? 1.0 / i : -1.0 / i;
11
  }
12
  return pi * 4.0;
13
}
14

15
TEST(ProfilerTest, LinuxPerf) {
16
  torch::profiler::impl::linux_perf::PerfProfiler profiler;
17

18
  std::vector<std::string> standard_events(
19
      std::begin(torch::profiler::ProfilerPerfEvents),
20
      std::end(torch::profiler::ProfilerPerfEvents));
21
  torch::profiler::perf_counters_t counters;
22
  counters.resize(standard_events.size(), 0);
23

24
  // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
25
  // gracefully if the syscall were to fail
26
  try {
27
    profiler.Configure(standard_events);
28

29
    profiler.Enable();
30
    auto pi = calc_pi();
31
    profiler.Disable(counters);
32
  } catch (const c10::Error&) {
33
    // Bail here if something bad happened during the profiling, we don't want
34
    // to make the test fail
35
    return;
36
  } catch (...) {
37
    // something else went wrong - this should be reported
38
    ASSERT_EQ(0, 1);
39
  }
40

41
  // Should have counted something if worked, so lets test that
42
  // And if it not supported the counters should be zeros.
43
#if defined(__ANDROID__) || defined(__linux__)
44
  for (auto counter : counters) {
45
    ASSERT_GT(counter, 0);
46
  }
47
#else /* __ANDROID__ || __linux__ */
48
  for (auto counter : counters) {
49
    ASSERT_EQ(counter, 0);
50
  }
51
#endif /* __ANDROID__ || __linux__ */
52
}
53

54
TEST(ProfilerTest, LinuxPerfNestedDepth) {
55
  torch::profiler::impl::linux_perf::PerfProfiler profiler;
56

57
  // Only monotonically increasing events will work
58
  std::vector<std::string> standard_events(
59
      std::begin(torch::profiler::ProfilerPerfEvents),
60
      std::end(torch::profiler::ProfilerPerfEvents));
61

62
  torch::profiler::perf_counters_t counters_A;
63
  torch::profiler::perf_counters_t counters_B;
64
  torch::profiler::perf_counters_t counters_C;
65

66
  counters_A.resize(standard_events.size(), 0);
67
  counters_B.resize(standard_events.size(), 0);
68
  counters_C.resize(standard_events.size(), 0);
69

70
  // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
71
  // gracefully if the syscall were to fail
72
  try {
73
    profiler.Configure(standard_events);
74

75
    // * = work kernel calc_pi()
76
    //
77
    // A --*---+              +--*-- A
78
    //         |              |
79
    //         |              |
80
    //       B +-*--+    +--*-+ B
81
    //              |    |
82
    //              |    |
83
    //            C +-*--+ C
84
    //
85

86
    profiler.Enable();
87
    auto A = calc_pi();
88

89
    profiler.Enable();
90
    auto B = calc_pi();
91

92
    profiler.Enable();
93
    auto C = calc_pi();
94
    profiler.Disable(counters_C);
95

96
    auto B2 = calc_pi();
97
    profiler.Disable(counters_B);
98

99
    auto A2 = calc_pi();
100
    profiler.Disable(counters_A);
101
  } catch (const c10::Error&) {
102
    // Bail here if something bad happened during the profiling, we don't want
103
    // to make the test fail
104
    return;
105
  } catch (...) {
106
    // something else went wrong - this should be reported
107
    ASSERT_EQ(0, 1);
108
  }
109

110
// for each counter, assert A > B > C
111
#if defined(__ANDROID__) || defined(__linux__)
112
  for (auto i = 0; i < standard_events.size(); ++i) {
113
    ASSERT_GT(counters_A[i], counters_B[i]);
114
    ASSERT_GT(counters_A[i], counters_C[i]);
115
    ASSERT_GT(counters_B[i], counters_C[i]);
116
    ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
117
  }
118
#else /* __ANDROID__ || __linux__ */
119
  for (auto i = 0; i < standard_events.size(); ++i) {
120
    ASSERT_EQ(counters_A[i], 0);
121
    ASSERT_EQ(counters_B[i], 0);
122
    ASSERT_EQ(counters_C[i], 0);
123
  }
124
#endif /* __ANDROID__ || __linux__ */
125
}
126

127
TEST(ProfilerTest, LinuxPerfNestedMultiple) {
128
  torch::profiler::impl::linux_perf::PerfProfiler profiler;
129

130
  // Only monotonically increasing events will work
131
  std::vector<std::string> standard_events(
132
      std::begin(torch::profiler::ProfilerPerfEvents),
133
      std::end(torch::profiler::ProfilerPerfEvents));
134

135
  torch::profiler::perf_counters_t counters_A;
136
  torch::profiler::perf_counters_t counters_B;
137
  torch::profiler::perf_counters_t counters_C;
138

139
  counters_A.resize(standard_events.size(), 0);
140
  counters_B.resize(standard_events.size(), 0);
141
  counters_C.resize(standard_events.size(), 0);
142

143
  // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
144
  // gracefully if the syscall were to fail
145
  try {
146
    profiler.Configure(standard_events);
147

148
    // * = work kernel calc_pi()
149
    //
150
    // A --*---+    +---*----+    +--*-- A
151
    //         |    |        |    |
152
    //         |    |        |    |
153
    //      B  +-**-+ B    C +-*--+ C
154

155
    profiler.Enable();
156
    auto A1 = calc_pi();
157

158
    profiler.Enable();
159
    auto B1 = calc_pi();
160
    auto B2 = calc_pi();
161
    profiler.Disable(counters_B);
162

163
    auto A2 = calc_pi();
164

165
    profiler.Enable();
166
    auto C1 = calc_pi();
167
    profiler.Disable(counters_C);
168

169
    auto A3 = calc_pi();
170
    profiler.Disable(counters_A);
171
  } catch (const c10::Error&) {
172
    // Bail here if something bad happened during the profiling, we don't want
173
    // to make the test fail
174
    return;
175
  } catch (...) {
176
    // something else went wrong - this should be reported
177
    ASSERT_EQ(0, 1);
178
  }
179

180
// for each counter, assert A > B > C
181
#if defined(__ANDROID__) || defined(__linux__)
182
  for (auto i = 0; i < standard_events.size(); ++i) {
183
    ASSERT_GT(counters_A[i], counters_B[i]);
184
    ASSERT_GT(counters_A[i], counters_C[i]);
185
    ASSERT_GT(counters_B[i], counters_C[i]);
186
    ASSERT_GT(counters_A[i], counters_B[i] + counters_C[i]);
187
  }
188
#else /* __ANDROID__ || __linux__ */
189
  for (auto i = 0; i < standard_events.size(); ++i) {
190
    ASSERT_EQ(counters_A[i], 0);
191
    ASSERT_EQ(counters_B[i], 0);
192
    ASSERT_EQ(counters_C[i], 0);
193
  }
194
#endif /* __ANDROID__ || __linux__ */
195
}
196

197
TEST(ProfilerTest, LinuxPerfNestedSingle) {
198
  torch::profiler::impl::linux_perf::PerfProfiler profiler;
199

200
  // Only monotonically increasing events will work
201
  std::vector<std::string> standard_events(
202
      std::begin(torch::profiler::ProfilerPerfEvents),
203
      std::end(torch::profiler::ProfilerPerfEvents));
204

205
  torch::profiler::perf_counters_t counters_A;
206
  torch::profiler::perf_counters_t counters_B;
207
  torch::profiler::perf_counters_t counters_C;
208

209
  counters_A.resize(standard_events.size(), 0);
210
  counters_B.resize(standard_events.size(), 0);
211
  counters_C.resize(standard_events.size(), 0);
212

213
  // Use try..catch HACK to check TORCH_CHECK because we don't yet fail
214
  // gracefully if the syscall were to fail
215
  try {
216
    profiler.Configure(standard_events);
217

218
    profiler.Enable();
219
    profiler.Enable();
220
    profiler.Enable();
221
    auto A1 = calc_pi();
222
    profiler.Disable(counters_C);
223
    profiler.Disable(counters_B);
224
    profiler.Disable(counters_A);
225
  } catch (const c10::Error&) {
226
    // Bail here if something bad happened during the profiling, we don't want
227
    // to make the test fail
228
    return;
229
  } catch (...) {
230
    // something else went wrong - this should be reported
231
    ASSERT_EQ(0, 1);
232
  }
233

234
// for each counter, assert A > B > C
235
#if defined(__ANDROID__) || defined(__linux__)
236
  for (auto i = 0; i < standard_events.size(); ++i) {
237
    ASSERT_GE(counters_A[i], counters_B[i]);
238
    ASSERT_GE(counters_A[i], counters_C[i]);
239
    ASSERT_GE(counters_B[i], counters_C[i]);
240
  }
241
#else /* __ANDROID__ || __linux__ */
242
  for (auto i = 0; i < standard_events.size(); ++i) {
243
    ASSERT_EQ(counters_A[i], 0);
244
    ASSERT_EQ(counters_B[i], 0);
245
    ASSERT_EQ(counters_C[i], 0);
246
  }
247
#endif /* __ANDROID__ || __linux__ */
248
}
249
pytorch

Использование cookies