pytorch

convert_image_to_tensor.cc
450 строк · 13.3 Кб
Перенос по словам
1
/**
2
 * Copyright (c) 2016-present, Facebook, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
#include <opencv2/opencv.hpp>
18
#include <cmath>
19
#include <fstream>
20

21
#include "caffe2/core/common.h"
22
#include "caffe2/core/db.h"
23
#include "caffe2/core/init.h"
24
#include "caffe2/core/logging.h"
25
#include "caffe2/core/timer.h"
26
#include "caffe2/proto/caffe2_pb.h"
27
#include "caffe2/utils/proto_utils.h"
28
#include "caffe2/utils/string_utils.h"
29

30

31
C10_DEFINE_int(
32
    batch_size,
33
    -1,
34
    "Specify the batch size of the input. The number of items in the "
35
    "input needs to be multiples of the batch size. If the batch size "
36
    "is less than 0, all inputs are in one batch.")
37
C10_DEFINE_bool(color, true, "If set, load images in color.");
38
C10_DEFINE_string(
39
    crop,
40
    "-1,-1",
41
    "The center cropped hight and width. If the value is less than zero, "
42
    "it is not cropped.");
43
C10_DEFINE_string(input_images, "", "Comma separated images");
44
C10_DEFINE_string(input_image_file, "", "The file containing imput images");
45
C10_DEFINE_string(input_text_file, "", "the text file to be written to blobs");
46
C10_DEFINE_string(
47
    output_tensor,
48
    "",
49
    "The output tensor file in NCHW for input images");
50
C10_DEFINE_string(
51
    output_text_tensor,
52
    "",
53
    "The output tensor file for the text input specified in input_text_file");
54
C10_DEFINE_string(
55
    preprocess,
56
    "",
57
    "Options to specify the preprocess routines. The available options are "
58
    "subtract128, normalize, mean, std, bgrtorgb. If multiple steps are provided, they "
59
    "are separated by comma (,) in sequence.");
60
C10_DEFINE_string(
61
    report_time,
62
    "",
63
    "Report the conversion stage time to screen. "
64
    "The format of the string is <type>|<identifier>. "
65
    "The valid type is 'json'. "
66
    "The valid identifier is nothing or an identifier that prefix every line");
67
C10_DEFINE_string(
68
    scale,
69
    "-1,-1",
70
    "Scale the images to be within the min,max box. The shorter edge is "
71
    "min pixels. But if the other edge is more than the max pixels, the "
72
    "other edge and scaled to max pixels (and the shorter edge can be less "
73
    "than the min pixels");
74
C10_DEFINE_bool(text_output, false, "Write the output in text format.");
75
C10_DEFINE_bool(warp, false, "If warp is set, warp the images to square.");
76

77
namespace caffe2 {
78

79
void reportTime(
80
    std::string type,
81
    double ts,
82
    std::string metric,
83
    std::string unit) {
84
  if (FLAGS_report_time == "") {
85
    return;
86
  }
87
  vector<string> s = caffe2::split('|', FLAGS_report_time);
88
  assert(s[0] == "json");
89
  std::string identifier = "";
90
  if (s.size() > 1) {
91
    identifier = s[1];
92
  }
93
  std::cout << identifier << "{\"type\": \"" << type << "\", \"value\": " << ts
94
            << ", \"metric\": \"" << metric << "\", \"unit\": \"" << unit
95
            << "\"}" << std::endl;
96
}
97

98
void splitSizes(const std::string& arg, int* ptr0, int* ptr1) {
99
  vector<string> sizes = caffe2::split(',', arg);
100
  if (sizes.size() == 2) {
101
    *ptr0 = std::stoi(sizes[0]);
102
    *ptr1 = std::stoi(sizes[1]);
103
  } else if (sizes.size() == 1) {
104
    *ptr0 = std::stoi(sizes[0]);
105
    *ptr1 = std::stoi(sizes[0]);
106
  } else {
107
    assert(false);
108
  }
109
}
110

111

112
cv::Mat resizeImage(cv::Mat& img) {
113
  int min_size, max_size;
114
  splitSizes(FLAGS_scale, &min_size, &max_size);
115
  if ((min_size <= 0) && (max_size <= 0)) {
116
    return img;
117
  }
118
  if (max_size < 0) {
119
    max_size = INT_MAX;
120
  }
121
  assert(min_size <= max_size);
122

123
  int im_min_size = img.rows > img.cols ? img.cols : img.rows;
124
  int im_max_size = img.rows > img.cols ? img.rows : img.cols;
125

126
  double im_scale = 1.0 * min_size / im_min_size;
127
  if (im_scale * im_max_size > max_size) {
128
    im_scale = 1.0 * max_size / im_max_size;
129
  }
130
  int scaled_width = int(round(img.cols * im_scale));
131
  int scaled_height = int(round(img.rows * im_scale));
132
  assert((scaled_width <= max_size) && (scaled_height <= max_size));
133
  if ((scaled_width < min_size) || (scaled_height < min_size)) {
134
    assert((scaled_width == max_size) || (scaled_height == max_size));
135
  } else {
136
    assert((scaled_width == min_size) || (scaled_height == min_size));
137
  }
138
  cv::Mat resized_img;
139
  cv::resize(
140
      img,
141
      resized_img,
142
      cv::Size(),
143
      im_scale,
144
      im_scale,
145
      cv::INTER_LINEAR);
146
  return resized_img;
147
}
148

149
cv::Mat cropToRec(cv::Mat& img, int* height_ptr, int* width_ptr) {
150
  int height = *height_ptr;
151
  int width = *width_ptr;
152
  if ((height > 0) && (width > 0) &&
153
      ((img.rows != height) || (img.cols != width))) {
154
    cv::Mat cropped_img, cimg;
155
    cv::Rect roi;
156
    roi.x = int((img.cols - width) / 2);
157
    roi.y = int((img.rows - height) / 2);
158
    roi.x = roi.x < 0 ? 0 : roi.x;
159
    roi.y = roi.y < 0 ? 0 : roi.y;
160
    width = width > img.cols ? img.cols : width;
161
    height = height > img.rows ? img.rows : height;
162
    roi.width = width;
163
    roi.height = height;
164
    assert(
165
        0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= img.cols &&
166
        0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= img.rows);
167
    cropped_img = img(roi);
168
    // Make the image in continuous space in memory
169
    cimg = cropped_img.clone();
170
    *height_ptr = height;
171
    *width_ptr = width;
172
    return cimg;
173
  } else {
174
    return img;
175
  }
176
}
177

178
std::vector<float> convertToVector(cv::Mat& img) {
179
  std::vector<float> normalize(3, 1);
180
  std::vector<float> mean(3, 0);
181
  std::vector<float> std(3, 1);
182
  bool bgrtorgb = false;
183
  int size = img.cols * img.rows;
184
  vector<string> steps = caffe2::split(',', FLAGS_preprocess);
185
  for (int i = 0; i < steps.size(); i++) {
186
    auto step = steps[i];
187
    if (step == "subtract128") {
188
      mean = {128, 128, 128};
189
      std = {1, 1, 1};
190
      normalize = {1, 1, 1};
191
    } else if (step == "normalize") {
192
      normalize = {255, 255, 255};
193
    } else if (step == "mean") {
194
      mean = {0.406f, 0.456f, 0.485f};
195
    } else if (step == "std") {
196
      std = {0.225f, 0.224f, 0.229f};
197
    } else if (step == "bgrtorgb") {
198
      bgrtorgb = true;
199
    } else {
200
      CAFFE_ENFORCE(
201
          false,
202
          "Unsupported preprocess step. The supported steps are: subtract128, "
203
          "normalize,mean, std, swaprb.");
204
    }
205
  }
206

207
  int C = FLAGS_color ? 3 : 1;
208
  int total_size = C * size;
209
  std::vector<float> values(total_size);
210
  if (C == 1) {
211
    cv::MatIterator_<float> it, end;
212
    int idx = 0;
213
    for (it = img.begin<float>(), end = img.end<float>(); it != end; ++it) {
214
      values[idx++] = (*it / normalize[0] - mean[0]) / std[0];
215
    }
216
  } else {
217
    int i = 0;
218
    cv::MatIterator_<cv::Vec3f> it, end;
219
    int b = bgrtorgb ? 2 : 0;
220
    int g = 1;
221
    int r = bgrtorgb ? 0 : 2;
222
    for (it = img.begin<cv::Vec3f>(), end = img.end<cv::Vec3f>(); it != end;
223
         ++it, i++) {
224
      values[i] = (((*it)[b] / normalize[0] - mean[0]) / std[0]);
225
      int offset = size + i;
226
      values[offset] = (((*it)[g] / normalize[1] - mean[1]) / std[1]);
227
      offset = size + offset;
228
      values[offset] = (((*it)[r] / normalize[2] - mean[2]) / std[2]);
229
    }
230
  }
231
  return values;
232
}
233

234
std::vector<float> convertOneImage(
235
    std::string& filename,
236
    int* height_ptr,
237
    int* width_ptr) {
238
  assert(filename[0] != '~');
239

240
  std::cout << "Converting " << filename << std::endl;
241

242
  // Load image
243
  cv::Mat img_uint8 = cv::imread(
244
#if CV_MAJOR_VERSION <= 3
245
      filename, FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
246
#else
247
      filename, FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
248
#endif
249
  caffe2::Timer timer;
250
  timer.Start();
251
  cv::Mat img;
252
  // Convert image to floating point values
253
  img_uint8.convertTo(img, CV_32F);
254
  // Resize image
255
  cv::Mat resized_img = resizeImage(img);
256

257
  int height, width;
258
  splitSizes(FLAGS_crop, &height, &width);
259
  if ((height <= 0) || (width <= 0)) {
260
    height = resized_img.rows;
261
    width = resized_img.cols;
262
  }
263
  cv::Mat crop = cropToRec(resized_img, &height, &width);
264

265
  // Assert we don't have to deal with alignment
266
  DCHECK(crop.isContinuous());
267
  assert(crop.rows == height);
268
  assert(crop.cols == width);
269
  std::vector<float> one_image_values = convertToVector(crop);
270
  *height_ptr = height;
271
  *width_ptr = width;
272
  double ts = timer.MicroSeconds();
273
  reportTime("image_preprocess", ts, "convert", "us");
274
  return one_image_values;
275
}
276

277
int getBatchSize(int num_items) {
278
  int batch_size = FLAGS_batch_size;
279
  if (batch_size < 0) {
280
    batch_size = num_items;
281
  } else {
282
    assert(num_items % batch_size == 0);
283
  }
284
  return batch_size;
285
}
286

287
void writeValues(
288
    std::vector<std::vector<std::vector<float>>>& values,
289
    std::vector<std::vector<int>>& dims,
290
    std::string output_file) {
291

292
  caffe2::Timer timer;
293
  timer.Start();
294

295
  assert(dims.size() == values.size());
296
  int num_batches = dims.size();
297

298
  TensorProtos protos;
299
  for (int k = 0; k < num_batches; k++) {
300
    TensorProto* data;
301
    data = protos.add_protos();
302
    data->set_data_type(TensorProto::FLOAT);
303
    auto one_dim = dims[k];
304
    for (int dim : one_dim) {
305
      data->add_dims(dim);
306
    }
307
    int batch_size = one_dim[0];
308
    long long int entry_size = 1;
309
    for (int i = 1; i < one_dim.size(); i++) {
310
      entry_size *= one_dim[i];
311
    }
312

313
    // Not optimized
314
    for (int i = 0; i < batch_size; i++) {
315
      assert(values[k][i].size() == entry_size);
316
      for (int j = 0; j < values[k][i].size(); j++) {
317
        data->add_float_data(values[k][i][j]);
318
      }
319
    }
320
  }
321
  double ts = timer.MicroSeconds();
322
  reportTime("preprocess", ts, "data_pack", "us");
323

324
  if (FLAGS_text_output) {
325
    caffe2::WriteProtoToTextFile(protos, output_file);
326
  } else {
327
    caffe2::WriteProtoToBinaryFile(protos, output_file);
328
  }
329
}
330

331
void convertImages() {
332
  vector<string> file_names;
333
  if (FLAGS_input_images != "") {
334
    file_names = caffe2::split(',', FLAGS_input_images);
335
  } else if (FLAGS_input_image_file != "") {
336
    std::ifstream infile(FLAGS_input_image_file);
337
    std::string line;
338
    while (std::getline(infile, line)) {
339
      vector<string> file_name = caffe2::split(',', line);
340
      string name;
341
      if (file_name.size() == 3) {
342
        name = file_name[2];
343
      } else {
344
        name = line;
345
      }
346
      file_names.push_back(name);
347
    }
348
  } else {
349
    return;
350
  }
351
  int batch_size = getBatchSize(file_names.size());
352
  int num_batches = file_names.size() / batch_size;
353
  assert(file_names.size() == batch_size * num_batches);
354
  std::vector<std::vector<std::vector<float>>> values;
355
  std::vector<std::vector<int>> dims;
356
  int C = FLAGS_color ? 3 : 1;
357
  for (int k = 0; k < num_batches; k++) {
358
    std::vector<std::vector<float>> one_value;
359
    int height = -1;
360
    int width = -1;
361
    for (int i = 0; i < batch_size; i++) {
362
      int idx = k * batch_size + i;
363
      int one_height, one_width;
364
      std::vector<float> one_image_values =
365
          convertOneImage(file_names[idx], &one_height, &one_width);
366
      if (height < 0 && width < 0) {
367
        height = one_height;
368
        width = one_width;
369
      } else {
370
        assert(height == one_height);
371
        assert(width == one_width);
372
      }
373
      one_value.push_back(one_image_values);
374
    }
375
    vector<int> one_dim = {batch_size, C, height, width};
376
    dims.push_back(one_dim);
377
    values.push_back(one_value);
378
  }
379
  writeValues(values, dims, FLAGS_output_tensor);
380
}
381

382
template <class TYPE>
383
vector<TYPE> splitString(std::string& line) {
384
  vector<string> vector_str = caffe2::split(',', line);
385
  vector<TYPE> vector_int;
386
  for (string str : vector_str) {
387
    vector_int.push_back((TYPE)std::stod(str));
388
  }
389
  return vector_int;
390
}
391

392
/* Convert the values in a json file to blobs
393
   The format of the json file should be:
394
   <number of items>,  <dim2>.... (dimensions of items)
395
   <entry>, <entry>, <entry>... (all entries in one item)
396
   <entry>, <entry>, <entry>...
397
   ....
398
*/
399
void convertValues() {
400
  if (FLAGS_input_text_file == "") {
401
    return;
402
  }
403
  std::ifstream infile(FLAGS_input_text_file);
404
  std::string line;
405
  std::getline(infile, line);
406
  vector<int> file_dims = splitString <int>(line);
407
  assert(file_dims.size() >= 2);
408

409
  int num_items = file_dims[0];
410
  int batch_size = getBatchSize(num_items);
411
  int num_batches = num_items / batch_size;
412
  assert(num_items == batch_size * num_batches);
413
  vector<string> lines;
414
  while (std::getline(infile, line)) {
415
    lines.push_back(line);
416
  }
417
  assert(lines.size() == num_items);
418
  std::vector<std::vector<std::vector<float>>> values;
419
  std::vector<std::vector<int>> dims;
420
  for (int i = 0; i < num_batches; i++) {
421
    std::vector<std::vector<float>> one_value;
422
    int num = -1;
423
    for (int j = 0; j < batch_size; j++) {
424
      int idx = i * batch_size + j;
425
      std::string line = lines[idx];
426
      vector<float> item = splitString<float>(line);
427
      if (num < 0) {
428
        num = item.size();
429
      } else {
430
        assert(num == item.size());
431
      }
432
      one_value.push_back(item);
433
    }
434
    vector<int> batch_dims = file_dims;
435
    batch_dims[0] = batch_size;
436
    dims.push_back(batch_dims);
437
    values.push_back(one_value);
438
  }
439

440
  writeValues(values, dims, FLAGS_output_text_tensor);
441
}
442

443
} // namespace caffe2
444

445
int main(int argc, char** argv) {
446
  caffe2::GlobalInit(&argc, &argv);
447
  caffe2::convertImages();
448
  caffe2::convertValues();
449
  return 0;
450
}
451
pytorch

Использование cookies