pytorch

Форк
0
/
import_data.cpp 
284 строки · 9.7 Кб
1
#include <torch/csrc/jit/mobile/import_data.h>
2

3
#include <ATen/Functions.h>
4
#include <ATen/core/ivalue.h>
5
#include <c10/util/irange.h>
6
#include <caffe2/serialize/file_adapter.h>
7
#include <caffe2/serialize/inline_container.h>
8
#include <torch/csrc/jit/api/compilation_unit.h>
9
#include <torch/csrc/jit/mobile/file_format.h>
10
#include <torch/csrc/jit/mobile/flatbuffer_loader.h>
11
#include <torch/csrc/jit/mobile/import.h>
12
#include <torch/csrc/jit/mobile/import_export_common.h>
13
#include <torch/csrc/jit/mobile/module.h>
14
#include <torch/csrc/jit/mobile/observer.h>
15
#include <torch/csrc/jit/mobile/type_parser.h>
16
#include <torch/csrc/jit/runtime/instruction.h>
17
#include <torch/csrc/jit/serialization/unpickler.h>
18
#include <torch/custom_class.h>
19

20
#include <caffe2/serialize/in_memory_adapter.h>
21
#include <exception>
22
#include <fstream>
23
#include <string>
24
#include <vector>
25

26
namespace torch {
27
namespace jit {
28
using caffe2::serialize::FileAdapter;
29
using caffe2::serialize::IStreamAdapter;
30
using caffe2::serialize::MemoryReadAdapter;
31
using caffe2::serialize::PyTorchStreamReader;
32
using caffe2::serialize::ReadAdapterInterface;
33

34
namespace {
35

36
/**
37
 * Given a ZIP file containing a file named "data.pkl", uses Pickle to
38
 * deserialize the file and returns the IValue inside it.
39
 */
40
class IValueUnpickler final {
41
 public:
42
  explicit IValueUnpickler(std::unique_ptr<PyTorchStreamReader> reader);
43
  c10::IValue deserialize(c10::optional<at::Device> device);
44

45
 private:
46
  c10::IValue readArchive(
47
      const std::string& archive_name,
48
      std::shared_ptr<mobile::CompilationUnit> mcu,
49
      c10::optional<at::Device> device);
50

51
  std::shared_ptr<CompilationUnit> compilation_unit_;
52
  std::unique_ptr<PyTorchStreamReader> reader_;
53
};
54

55
IValueUnpickler::IValueUnpickler(std::unique_ptr<PyTorchStreamReader> reader)
56
    : compilation_unit_(std::make_shared<CompilationUnit>()),
57
      reader_(std::move(reader)) {}
58

59
c10::IValue IValueUnpickler::deserialize(c10::optional<at::Device> device) {
60
  auto mcu = std::make_shared<mobile::CompilationUnit>();
61

62
  // NOLINTNEXTLINE(performance-move-const-arg)
63
  return readArchive("data", mcu, std::move(device));
64
}
65

66
c10::IValue IValueUnpickler::readArchive(
67
    const std::string& archive_name,
68
    std::shared_ptr<mobile::CompilationUnit> mcu,
69
    c10::optional<at::Device> device) {
70
  std::stringstream picklename;
71
  picklename << archive_name << ".pkl";
72
  at::DataPtr pickle_ptr;
73
  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
74
  size_t pickle_size;
75
  std::tie(pickle_ptr, pickle_size) = reader_->getRecord(picklename.str());
76

77
  size_t bytes_read = 0;
78
  auto data = reinterpret_cast<const char*>(pickle_ptr.get());
79
  auto reader = [&](char* buffer, size_t len) -> size_t {
80
    if (bytes_read >= pickle_size) {
81
      return 0;
82
    }
83
    len = std::min(pickle_size - bytes_read, len);
84
    // Copy len bytes into buffer
85
    const char* start = data + bytes_read;
86
    std::memcpy(buffer, start, len);
87
    bytes_read += len;
88
    return len;
89
  };
90

91
  static const c10::QualifiedName torchPrefix = "__torch__";
92
  auto type_resolver = [&](const c10::QualifiedName& qn) {
93
    TypePtr type;
94
    // HACK: first we check whether the name starts with `__torch__` to tell if
95
    // it's "supposed" to be a class type. This is a reliable check today, but
96
    // there is no guarantee that this is the case. The real solution is to
97
    // merge type parsers so we can share class resolution logic.
98
    if (torchPrefix.isPrefixOf(qn)) {
99
      if (compilation_unit_->get_class(qn) == nullptr) {
100
        auto typeptr = ClassType::create(qn, compilation_unit_, true);
101
        compilation_unit_->register_type(typeptr);
102
      }
103
      type = compilation_unit_->get_class(qn);
104
    } else {
105
      type = c10::parseType(qn.qualifiedName());
106
    }
107
    return c10::StrongTypePtr(compilation_unit_, type);
108
  };
109

110
  auto obj_loader = [&](const at::StrongTypePtr& type, IValue input) {
111
    auto cls = type.type_->expect<at::ClassType>();
112
    auto qn = cls->name();
113
    c10::QualifiedName method_name(qn.value(), "__setstate__");
114
    auto setstate = mcu->find_function(method_name);
115
    auto find_custom_class_with_setstate = [&qn]() -> c10::ClassTypePtr {
116
      auto custom_class_type = torch::jit::getCustomClass(qn->qualifiedName());
117
      if (custom_class_type && custom_class_type->findMethod("__setstate__")) {
118
        return custom_class_type;
119
      }
120
      return nullptr;
121
    };
122
    if (setstate) {
123
      auto obj = c10::ivalue::Object::create(type, 0);
124
      Stack stack({obj, input});
125
      setstate->run(stack);
126
      return obj;
127
    } else if (auto custom_class_type = find_custom_class_with_setstate()) {
128
      auto obj = c10::ivalue::Object::create(
129
          c10::StrongTypePtr(nullptr, custom_class_type), 1);
130
      Stack stack({obj, input});
131
      custom_class_type->getMethod("__setstate__").run(stack);
132
      return obj;
133
    } else {
134
      auto dict = std::move(input).toGenericDict();
135
      size_t ndict = dict.size();
136
      auto obj = c10::ivalue::Object::create(type, ndict);
137
      auto it = dict.begin();
138
      for (const auto i : c10::irange(ndict)) {
139
        std::stringstream name;
140
        name << it->key();
141
        cls->addOrCheckAttribute(name.str(), it->key().type());
142
        obj->setSlot(i, it->value());
143
        ++it;
144
      }
145
      return obj;
146
    }
147
  };
148

149
  auto read_record = [&](const std::string& name) {
150
    std::stringstream ss;
151
    ss << archive_name << "/" << name;
152
    return std::get<0>(reader_->getRecord(ss.str()));
153
  };
154

155
  Unpickler unpickler(
156
      reader,
157
      std::move(type_resolver),
158
      std::move(obj_loader),
159
      std::move(read_record),
160
      // NOLINTNEXTLINE(performance-move-const-arg)
161
      std::move(device),
162
      false,
163
      nullptr);
164
  return unpickler.parse_ivalue();
165
}
166

167
/**
168
 * Extracts and returns the parameter map serialized as ZIP + Pickle in @p rai.
169
 */
170
std::map<std::string, at::Tensor> load_parameters_from_zip(
171
    std::unique_ptr<ReadAdapterInterface> rai,
172
    c10::optional<c10::Device> device) {
173
  auto reader = std::make_unique<PyTorchStreamReader>(std::move(rai));
174
  IValueUnpickler unpickler(std::move(reader));
175
  auto result = unpickler.deserialize(device).toGenericDict();
176
  std::map<std::string, at::Tensor> map;
177
  for (const auto& e : result) {
178
    auto key = e.key().toStringRef();
179
    auto value = e.value().toTensor().tensor_data();
180
    map[key] = value;
181
  }
182
  return map;
183
}
184

185
} // namespace
186

187
/**
188
 * Extracts the parameter map stored in @p module. Expects a layout
189
 * compatible with the one created by #_save_parameters().
190
 */
191
std::map<std::string, at::Tensor> mobile_module_to_parameter_map(
192
    const mobile::Module& module) {
193
  // Safely look for a slot with the expected name. Note that
194
  // c10::ivalue::Object::getAttr() is not safe if the attribute isn't present.
195
  auto obj = module._ivalue();
196
  const std::vector<IValue>& slots = obj->slots();
197
  for (const auto i : c10::irange(slots.size())) {
198
    if (obj->type()->getAttributeName(i) ==
199
        mobile::internal::kSavedParametersAttributeName) {
200
      // Found a slot with the right name; make sure it's a
201
      // Dict<string, Tensor>.
202
      c10::IValue data = slots[i];
203
      if (data.isGenericDict()) {
204
        auto data_dict = data.toGenericDict();
205

206
        // The key and value should be DynamicTypes that wrap String and Tensor.
207
        c10::DynamicType* keyType =
208
            data_dict.keyType()->castRaw<c10::DynamicType>();
209
        c10::DynamicType* valueType =
210
            data_dict.valueType()->castRaw<c10::DynamicType>();
211
        if (keyType != nullptr &&
212
            keyType->fallback()->kind() == TypeKind::StringType &&
213
            valueType != nullptr &&
214
            valueType->fallback()->kind() == TypeKind::TensorType) {
215
          // Name and type are good; copy the contents to the output map.
216
          std::map<std::string, at::Tensor> params;
217
          for (const auto& e : data_dict) {
218
            // The source Tensor points into the flatbuffer data associated with
219
            // the Module. But, this Tensor needs to outlive the Module, since
220
            // the caller of _load_parameters() won't have a pointer to the
221
            // Module. So, return a deep copy.
222
            const auto& source = e.value().toTensor();
223
            at::Tensor copy = at::empty_like(source); // Must be the same shape.
224
            copy.copy_(source);
225

226
            params[e.key().toStringRef()] = copy;
227
          }
228
          return params;
229
        }
230
      }
231
    }
232
  }
233

234
  TORCH_CHECK(
235
      false,
236
      "Could not find Dict<string, Tensor> named '",
237
      mobile::internal::kSavedParametersAttributeName,
238
      "' in deserialized mobile::Module");
239
}
240

241
static std::map<std::string, at::Tensor> _load_parameters_bytes(
242
    std::shared_ptr<char> data,
243
    size_t size,
244
    c10::optional<at::Device> device) {
245
  TORCH_CHECK(size >= kFileFormatHeaderSize, "Unrecognized data format");
246
  FileFormat format = getFileFormat(data.get());
247
  // Call the appropriate parser.
248
  std::map<std::string, at::Tensor> map;
249
  switch (format) {
250
    case FileFormat::FlatbufferFileFormat: {
251
      auto m = parse_flatbuffer_no_object(data, size, device);
252
      map = mobile_module_to_parameter_map(m);
253
      break;
254
    }
255

256
    case FileFormat::ZipFileFormat: {
257
      auto rai = std::make_unique<caffe2::serialize::MemoryReadAdapter>(
258
          data.get(), size);
259
      map = load_parameters_from_zip(std::move(rai), device);
260
      break;
261
    }
262

263
    default:
264
      TORCH_CHECK(false, "Unrecognized data format");
265
  }
266
  return map;
267
}
268

269
std::map<std::string, at::Tensor> _load_parameters(
270
    std::istream& in,
271
    c10::optional<at::Device> device) {
272
  auto [data, size] = get_stream_content(in);
273
  return _load_parameters_bytes(std::move(data), size, device);
274
}
275

276
std::map<std::string, at::Tensor> _load_parameters(
277
    const std::string& filename,
278
    c10::optional<at::Device> device) {
279
  auto [data, size] = get_file_content(filename.c_str());
280
  return _load_parameters_bytes(std::move(data), size, device);
281
}
282

283
} // namespace jit
284
} // namespace torch
285

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.