pytorch

Форк
0
/
cuda_profile_ops.cc 
108 строк · 3.1 Кб
1
#include "caffe2/core/common_gpu.h"
2
#include "caffe2/core/operator.h"
3

4
#include <stdlib.h>
5
#include <string.h>
6

7
#include <cuda_profiler_api.h>
8

9
namespace caffe2 {
10

11
static std::vector<std::string> kCudaProfileConfiguration = {
12
    "gpustarttimestamp",
13
    "gpuendtimestamp",
14
    "gridsize3d",
15
    "threadblocksize",
16
    "dynsmemperblock",
17
    "stasmemperblock",
18
    "regperthread",
19
    "memtransfersize",
20
    "memtransferdir",
21
    "memtransferhostmemtype",
22
    "streamid",
23
    "cacheconfigrequested",
24
    "cacheconfigexecuted",
25
    "countermodeaggregate",
26
    "enableonstart 0",
27
    "active_warps",
28
    "active_cycles",
29
};
30

31
class CudaProfileInitializeOp : public OperatorBase {
32
 public:
33
  CudaProfileInitializeOp(const OperatorDef& operator_def, Workspace* ws)
34
      : OperatorBase(operator_def, ws),
35
        output_(GetSingleArgument<std::string>("output", "/tmp/output")) {
36
    std::array<char, 128> buf;
37
    std::string tmpl = "/tmp/cuda_profile_config.XXXXXX";
38
    CAFFE_ENFORCE_LT(tmpl.size(), buf.size());
39
    memcpy(buf.data(), tmpl.data(), tmpl.size());
40
    auto result = mktemp(buf.data());
41
    CAFFE_ENFORCE_NE(strlen(result), 0, "mktemp: ", strerror(errno));
42
    config_ = result;
43

44
    // Write configuration to temporary file
45
    {
46
      std::ofstream ofs(config_, std::ios::out | std::ios::trunc);
47
      CAFFE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate());
48
      for (const auto& line : kCudaProfileConfiguration) {
49
        ofs << line << std::endl;
50
      }
51
    }
52
  }
53

54
  ~CudaProfileInitializeOp() override {
55
    unlink(config_.c_str());
56
  }
57

58
  bool Run(int /* unused */ /*stream_id*/ = 0) override {
59
    // If this fails, check the contents of "output" for hints.
60
#if defined(CUDA_VERSION) && CUDA_VERSION < 12000
61
    // cudaProfilerInitialize is no longer needed after CUDA 12:
62
    // https://forums.developer.nvidia.com/t/cudaprofilerinitialize-is-deprecated-alternative/200776/3
63
    CUDA_CHECK(
64
        cudaProfilerInitialize(config_.c_str(), output_.c_str(), cudaCSV));
65
#endif
66
    return true;
67
  }
68

69
 protected:
70
  std::string config_;
71
  std::string output_;
72
};
73

74
class CudaProfileStartOp : public OperatorBase {
75
 public:
76
  CudaProfileStartOp(const OperatorDef& operator_def, Workspace* ws)
77
      : OperatorBase(operator_def, ws) {}
78

79
  bool Run(int /* unused */ /*stream_id*/ = 0) override {
80
    CUDA_ENFORCE(cudaProfilerStart());
81
    return true;
82
  }
83
};
84

85
class CudaProfileStopOp : public OperatorBase {
86
 public:
87
  CudaProfileStopOp(const OperatorDef& operator_def, Workspace* ws)
88
      : OperatorBase(operator_def, ws) {}
89

90
  bool Run(int /* unused */ /*stream_id*/ = 0) override {
91
    CUDA_ENFORCE(cudaProfilerStop());
92
    return true;
93
  }
94
};
95

96
OPERATOR_SCHEMA(CudaProfileInitialize);
97
OPERATOR_SCHEMA(CudaProfileStart);
98
OPERATOR_SCHEMA(CudaProfileStop);
99

100
REGISTER_CPU_OPERATOR(CudaProfileInitialize, CudaProfileInitializeOp);
101
REGISTER_CPU_OPERATOR(CudaProfileStart, CudaProfileStartOp);
102
REGISTER_CPU_OPERATOR(CudaProfileStop, CudaProfileStopOp);
103

104
REGISTER_CUDA_OPERATOR(CudaProfileInitialize, CudaProfileInitializeOp);
105
REGISTER_CUDA_OPERATOR(CudaProfileStart, CudaProfileStartOp);
106
REGISTER_CUDA_OPERATOR(CudaProfileStop, CudaProfileStopOp);
107

108
} // namespace caffe2
109

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.