1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
17
#if defined(USE_NCNN_SIMPLEOCV)
20
#include <opencv2/core/core.hpp>
21
#include <opencv2/highgui/highgui.hpp>
22
#include <opencv2/imgproc/imgproc.hpp>
29
cv::Rect_<float> rect;
32
std::vector<float> maskdata;
36
static inline float intersection_area(const Object& a, const Object& b)
38
cv::Rect_<float> inter = a.rect & b.rect;
42
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
46
float p = objects[(left + right) / 2].prob;
50
while (objects[i].prob > p)
53
while (objects[j].prob < p)
59
std::swap(objects[i], objects[j]);
66
#pragma omp parallel sections
70
if (left < j) qsort_descent_inplace(objects, left, j);
74
if (i < right) qsort_descent_inplace(objects, i, right);
79
static void qsort_descent_inplace(std::vector<Object>& objects)
84
qsort_descent_inplace(objects, 0, objects.size() - 1);
87
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
91
const int n = faceobjects.size();
93
std::vector<float> areas(n);
94
for (int i = 0; i < n; i++)
96
areas[i] = faceobjects[i].rect.area();
99
for (int i = 0; i < n; i++)
101
const Object& a = faceobjects[i];
104
for (int j = 0; j < (int)picked.size(); j++)
106
const Object& b = faceobjects[picked[j]];
108
if (!agnostic && a.label != b.label)
111
// intersection over union
112
float inter_area = intersection_area(a, b);
113
float union_area = areas[i] + areas[picked[j]] - inter_area;
114
// float IoU = inter_area / union_area
115
if (inter_area / union_area > nms_threshold)
124
static int detect_yolact(const cv::Mat& bgr, std::vector<Object>& objects)
128
yolact.opt.use_vulkan_compute = true;
130
// original model converted from https://github.com/dbolya/yolact
131
// yolact_resnet50_54_800000.pth
132
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
133
if (yolact.load_param("yolact.param"))
135
if (yolact.load_model("yolact.bin"))
138
const int target_size = 550;
140
int img_w = bgr.cols;
141
int img_h = bgr.rows;
143
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, target_size, target_size);
145
const float mean_vals[3] = {123.68f, 116.78f, 103.94f};
146
const float norm_vals[3] = {1.0 / 58.40f, 1.0 / 57.12f, 1.0 / 57.38f};
147
in.substract_mean_normalize(mean_vals, norm_vals);
149
ncnn::Extractor ex = yolact.create_extractor();
151
ex.input("input.1", in);
156
ncnn::Mat confidence;
158
ex.extract("619", maskmaps); // 138x138 x 32
160
ex.extract("816", location); // 4 x 19248
161
ex.extract("818", mask); // maskdim 32 x 19248
162
ex.extract("820", confidence); // 81 x 19248
164
int num_class = confidence.w;
165
int num_priors = confidence.h;
168
ncnn::Mat priorbox(4, num_priors);
170
const int conv_ws[5] = {69, 35, 18, 9, 5};
171
const int conv_hs[5] = {69, 35, 18, 9, 5};
173
const float aspect_ratios[3] = {1.f, 0.5f, 2.f};
174
const float scales[5] = {24.f, 48.f, 96.f, 192.f, 384.f};
176
float* pb = priorbox;
178
for (int p = 0; p < 5; p++)
180
int conv_w = conv_ws[p];
181
int conv_h = conv_hs[p];
183
float scale = scales[p];
185
for (int i = 0; i < conv_h; i++)
187
for (int j = 0; j < conv_w; j++)
189
// +0.5 because priors are in center-size notation
190
float cx = (j + 0.5f) / conv_w;
191
float cy = (i + 0.5f) / conv_h;
193
for (int k = 0; k < 3; k++)
195
float ar = aspect_ratios[k];
199
float w = scale * ar / 550;
200
float h = scale / ar / 550;
202
// This is for backward compatibility with a bug where I made everything square by accident
203
// cfg.backbone.use_square_anchors:
218
const float confidence_thresh = 0.05f;
219
const float nms_threshold = 0.5f;
220
const int keep_top_k = 200;
222
std::vector<std::vector<Object> > class_candidates;
223
class_candidates.resize(num_class);
225
for (int i = 0; i < num_priors; i++)
227
const float* conf = confidence.row(i);
228
const float* loc = location.row(i);
229
const float* pb = priorbox.row(i);
230
const float* maskdata = mask.row(i);
232
// find class id with highest score
233
// start from 1 to skip background
236
for (int j = 1; j < num_class; j++)
238
float class_score = conf[j];
239
if (class_score > score)
246
// ignore background or low score
247
if (label == 0 || score <= confidence_thresh)
251
float var[4] = {0.1f, 0.1f, 0.2f, 0.2f};
258
float bbox_cx = var[0] * loc[0] * pb_w + pb_cx;
259
float bbox_cy = var[1] * loc[1] * pb_h + pb_cy;
260
float bbox_w = (float)(exp(var[2] * loc[2]) * pb_w);
261
float bbox_h = (float)(exp(var[3] * loc[3]) * pb_h);
263
float obj_x1 = bbox_cx - bbox_w * 0.5f;
264
float obj_y1 = bbox_cy - bbox_h * 0.5f;
265
float obj_x2 = bbox_cx + bbox_w * 0.5f;
266
float obj_y2 = bbox_cy + bbox_h * 0.5f;
269
obj_x1 = std::max(std::min(obj_x1 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
270
obj_y1 = std::max(std::min(obj_y1 * bgr.rows, (float)(bgr.rows - 1)), 0.f);
271
obj_x2 = std::max(std::min(obj_x2 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
272
obj_y2 = std::max(std::min(obj_y2 * bgr.rows, (float)(bgr.rows - 1)), 0.f);
276
obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
279
obj.maskdata = std::vector<float>(maskdata, maskdata + mask.w);
281
class_candidates[label].push_back(obj);
285
for (int i = 0; i < (int)class_candidates.size(); i++)
287
std::vector<Object>& candidates = class_candidates[i];
289
qsort_descent_inplace(candidates);
291
std::vector<int> picked;
292
nms_sorted_bboxes(candidates, picked, nms_threshold);
294
for (int j = 0; j < (int)picked.size(); j++)
297
objects.push_back(candidates[z]);
301
qsort_descent_inplace(objects);
304
if (keep_top_k < (int)objects.size())
306
objects.resize(keep_top_k);
310
for (int i = 0; i < (int)objects.size(); i++)
312
Object& obj = objects[i];
314
cv::Mat mask(maskmaps.h, maskmaps.w, CV_32FC1);
316
mask = cv::Scalar(0.f);
318
for (int p = 0; p < maskmaps.c; p++)
320
const float* maskmap = maskmaps.channel(p);
321
float coeff = obj.maskdata[p];
322
float* mp = (float*)mask.data;
325
for (int j = 0; j < maskmaps.w * maskmaps.h; j++)
327
mp[j] += maskmap[j] * coeff;
333
cv::resize(mask, mask2, cv::Size(img_w, img_h));
335
// crop obj box and binarize
336
obj.mask = cv::Mat(img_h, img_w, CV_8UC1);
338
obj.mask = cv::Scalar(0);
340
for (int y = 0; y < img_h; y++)
342
if (y < obj.rect.y || y > obj.rect.y + obj.rect.height)
345
const float* mp2 = mask2.ptr<const float>(y);
346
uchar* bmp = obj.mask.ptr<uchar>(y);
348
for (int x = 0; x < img_w; x++)
350
if (x < obj.rect.x || x > obj.rect.x + obj.rect.width)
353
bmp[x] = mp2[x] > 0.5f ? 255 : 0;
362
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
364
static const char* class_names[] = {"background",
365
"person", "bicycle", "car", "motorcycle", "airplane", "bus",
366
"train", "truck", "boat", "traffic light", "fire hydrant",
367
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
368
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
369
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
370
"skis", "snowboard", "sports ball", "kite", "baseball bat",
371
"baseball glove", "skateboard", "surfboard", "tennis racket",
372
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
373
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
374
"hot dog", "pizza", "donut", "cake", "chair", "couch",
375
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
376
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
377
"toaster", "sink", "refrigerator", "book", "clock", "vase",
378
"scissors", "teddy bear", "hair drier", "toothbrush"
381
static const unsigned char colors[81][3] = {
465
cv::Mat image = bgr.clone();
469
for (size_t i = 0; i < objects.size(); i++)
471
const Object& obj = objects[i];
476
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
477
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
479
const unsigned char* color = colors[color_index % 81];
482
cv::rectangle(image, obj.rect, cv::Scalar(color[0], color[1], color[2]));
485
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
488
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
491
int y = obj.rect.y - label_size.height - baseLine;
494
if (x + label_size.width > image.cols)
495
x = image.cols - label_size.width;
497
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
498
cv::Scalar(255, 255, 255), -1);
500
cv::putText(image, text, cv::Point(x, y + label_size.height),
501
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
504
for (int y = 0; y < image.rows; y++)
506
const uchar* mp = obj.mask.ptr(y);
507
uchar* p = image.ptr(y);
508
for (int x = 0; x < image.cols; x++)
512
p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + color[0] * 0.5);
513
p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + color[1] * 0.5);
514
p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + color[2] * 0.5);
521
cv::imwrite("result.png", image);
522
cv::imshow("image", image);
526
int main(int argc, char** argv)
530
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
534
const char* imagepath = argv[1];
536
cv::Mat m = cv::imread(imagepath, 1);
539
fprintf(stderr, "cv::imread %s failed\n", imagepath);
543
std::vector<Object> objects;
544
detect_yolact(m, objects);
546
draw_objects(m, objects);