pytorch

pybind_state_gpu.cc
177 строк · 6.0 Кб
Перенос по словам
1
// Note(jiayq): the import_array function is done inside
2
// caffe2_python.cc. Read
3
// http://docs.scipy.org/doc/numpy-1.10.1/reference/c-api.array.html#miscellaneous
4
// for more details.
5

6
#define NO_IMPORT_ARRAY
7

8
#include "pybind_state.h"
9

10
#include <pybind11/pybind11.h>
11
#include <pybind11/stl.h>
12

13
#ifdef CAFFE2_USE_CUDNN
14
#include "caffe2/core/common_cudnn.h"
15
#endif // CAFFE2_USE_CUDNN
16
#include <c10/cuda/CUDAGuard.h>
17
#include "caffe2/core/context_gpu.h"
18
#include "caffe2/operators/operator_fallback_gpu.h"
19
#include "caffe2/python/pybind_state_registry.h"
20

21
#ifdef CAFFE2_USE_TRT
22
#include "caffe2/contrib/tensorrt/tensorrt_tranformer.h"
23
#endif // CAFFE2_USE_TRT
24

25
namespace caffe2 {
26
namespace python {
27

28
REGISTER_CUDA_OPERATOR(Python, GPUFallbackOp);
29
REGISTER_CUDA_OPERATOR(PythonGradient, GPUFallbackOp);
30

31
REGISTER_CUDA_OPERATOR(PythonDLPack, GPUFallbackOp);
32
REGISTER_CUDA_OPERATOR(PythonDLPackGradient, GPUFallbackOp);
33

34
REGISTER_BLOB_FEEDER(CUDA, TensorFeeder<CUDAContext>);
35

36
namespace py = pybind11;
37

38
void addCUDAGlobalMethods(py::module& m) {
39
  m.def("num_cuda_devices", &NumCudaDevices);
40
  m.def("get_cuda_version", &CudaVersion);
41
#ifdef CAFFE2_USE_CUDNN
42
  m.def("get_cudnn_version", &cudnnCompiledVersion);
43
  m.attr("cudnn_convolution_fwd_algo_count") =
44
      py::int_((int)CUDNN_CONVOLUTION_FWD_ALGO_COUNT);
45
  m.attr("cudnn_convolution_bwd_data_algo_count") =
46
      py::int_((int)CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT);
47
  m.attr("cudnn_convolution_bwd_filter_algo_count") =
48
      py::int_((int)CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT);
49
#else
50
  m.def("get_cudnn_version", []() { return static_cast<size_t>(0); });
51
  m.attr("cudnn_convolution_fwd_algo_count") = py::int_(0);
52
  m.attr("cudnn_convolution_bwd_data_algo_count") = py::int_(0);
53
  m.attr("cudnn_convolution_bwd_filter_algo_count") = py::int_(0);
54
#endif
55
  m.def("get_gpu_memory_info", [](int device_id) {
56
    CUDAGuard guard(device_id);
57
    size_t device_free, device_total;
58
    CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
59
    return std::pair<size_t, size_t>{device_free, device_total};
60
  });
61
  m.def("get_cuda_peer_access_pattern", []() {
62
    std::vector<std::vector<bool>> pattern;
63
    CAFFE_ENFORCE(caffe2::GetCudaPeerAccessPattern(&pattern));
64
    return pattern;
65
  });
66
  m.def("get_device_properties", [](int deviceid) {
67
    auto& prop = GetDeviceProperty(deviceid);
68
    std::map<std::string, py::object> obj;
69
    obj["name"] = py::cast(prop.name);
70
    obj["major"] = py::cast(prop.major);
71
    obj["minor"] = py::cast(prop.minor);
72
    obj["totalGlobalMem"] = py::cast(prop.totalGlobalMem);
73
    return obj;
74
  });
75
  m.def(
76
      "onnx_to_trt_op",
77
      [](const py::bytes& onnx_model_str,
78
         const std::unordered_map<std::string, std::vector<int>>&
79
             output_size_hints,
80
         int max_batch_size,
81
         int max_workspace_size,
82
         int verbosity,
83
         bool debug_builder) -> py::bytes {
84
#ifdef CAFFE2_USE_TRT
85
        TensorRTTransformer t(
86
            max_batch_size, max_workspace_size, verbosity, debug_builder);
87
        auto op_def =
88
            t.BuildTrtOp(onnx_model_str.cast<std::string>(), output_size_hints);
89
        std::string out;
90
        op_def.SerializeToString(&out);
91
        return py::bytes(out);
92
#else
93
        CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
94
#endif // CAFFE2_USE_TRT
95
      });
96
  m.def(
97
      "transform_trt",
98
      [](const py::bytes& pred_net_str,
99
         const std::unordered_map<std::string, std::vector<int>>& shapes,
100
         int max_batch_size,
101
         int max_workspace_size,
102
         int verbosity,
103
         bool debug_builder,
104
         bool build_serializable_op) -> py::bytes {
105
#ifdef CAFFE2_USE_TRT
106
        caffe2::NetDef pred_net;
107
        if (!ParseProtoFromLargeString(
108
                pred_net_str.cast<std::string>(), &pred_net)) {
109
          LOG(ERROR) << "broken pred_net protobuf";
110
        }
111
        std::unordered_map<std::string, TensorShape> tensor_shapes;
112
        for (const auto& it : shapes) {
113
          tensor_shapes.emplace(
114
              it.first, CreateTensorShape(it.second, TensorProto::FLOAT));
115
        }
116
        TensorRTTransformer ts(
117
            max_batch_size,
118
            max_workspace_size,
119
            verbosity,
120
            debug_builder,
121
            build_serializable_op);
122
        ts.Transform(GetCurrentWorkspace(), &pred_net, tensor_shapes);
123
        std::string pred_net_str2;
124
        pred_net.SerializeToString(&pred_net_str2);
125
        return py::bytes(pred_net_str2);
126
#else
127
        CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
128
#endif // CAFFE2_USE_TRT
129
      });
130
};
131

132
void addCUDAObjectMethods(py::module& m) {
133
  py::class_<DLPackWrapper<CUDAContext>>(m, "DLPackTensorCUDA")
134
      .def_property_readonly(
135
          "data",
136
          [](DLPackWrapper<CUDAContext>* t) -> py::object {
137
            CAFFE_ENFORCE_EQ(
138
                t->device_option.device_type(),
139
                PROTO_CUDA,
140
                "Expected CUDA device option for CUDA tensor");
141

142
            return t->data();
143
          },
144
          "Return DLPack tensor with tensor's data.")
145
      .def(
146
          "feed",
147
          [](DLPackWrapper<CUDAContext>* t, py::object obj) {
148
            CAFFE_ENFORCE_EQ(
149
                t->device_option.device_type(),
150
                PROTO_CUDA,
151
                "Expected CUDA device option for CUDA tensor");
152
            t->feed(obj);
153
          },
154
          "Copy data from given DLPack tensor into this tensor.")
155
      .def_property_readonly(
156
          "_shape",
157
          [](const DLPackWrapper<CUDAContext>& t) { return t.tensor->sizes(); })
158
      .def(
159
          "_reshape",
160
          [](DLPackWrapper<CUDAContext>* t, std::vector<int64_t> dims) {
161
            t->tensor->Resize(dims);
162
          });
163
}
164

165
PYBIND11_MODULE(caffe2_pybind11_state_gpu, m) {
166
  m.doc() = "pybind11 stateful interface to Caffe2 workspaces - GPU edition";
167

168
  addGlobalMethods(m);
169
  addCUDAGlobalMethods(m);
170
  addObjectMethods(m);
171
  addCUDAObjectMethods(m);
172
  for (const auto& addition : PybindAdditionRegistry()->Keys()) {
173
    PybindAdditionRegistry()->Create(addition, m);
174
  }
175
}
176
} // namespace python
177
} // namespace caffe2
178
pytorch

Использование cookies