1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
18
#if defined(USE_NCNN_SIMPLEOCV)
21
#include <opencv2/core/core.hpp>
22
#include <opencv2/highgui/highgui.hpp>
23
#include <opencv2/imgproc/imgproc.hpp>
31
cv::Rect_<float> rect;
36
static inline float intersection_area(const Object& a, const Object& b)
38
cv::Rect_<float> inter = a.rect & b.rect;
42
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
46
float p = faceobjects[(left + right) / 2].prob;
50
while (faceobjects[i].prob > p)
53
while (faceobjects[j].prob < p)
59
std::swap(faceobjects[i], faceobjects[j]);
66
#pragma omp parallel sections
70
if (left < j) qsort_descent_inplace(faceobjects, left, j);
74
if (i < right) qsort_descent_inplace(faceobjects, i, right);
79
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
81
if (faceobjects.empty())
84
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
87
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
91
const int n = faceobjects.size();
93
std::vector<float> areas(n);
94
for (int i = 0; i < n; i++)
96
areas[i] = faceobjects[i].rect.area();
99
for (int i = 0; i < n; i++)
101
const Object& a = faceobjects[i];
104
for (int j = 0; j < (int)picked.size(); j++)
106
const Object& b = faceobjects[picked[j]];
108
if (!agnostic && a.label != b.label)
111
// intersection over union
112
float inter_area = intersection_area(a, b);
113
float union_area = areas[i] + areas[picked[j]] - inter_area;
114
// float IoU = inter_area / union_area
115
if (inter_area / union_area > nms_threshold)
124
static inline float sigmoid(float x)
126
return static_cast<float>(1.f / (1.f + exp(-x)));
129
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
131
const int num_grid_x = feat_blob.w;
132
const int num_grid_y = feat_blob.h;
134
const int num_anchors = anchors.w / 2;
136
const int num_class = feat_blob.c / num_anchors - 5;
138
const int feat_offset = num_class + 5;
140
for (int q = 0; q < num_anchors; q++)
142
const float anchor_w = anchors[q * 2];
143
const float anchor_h = anchors[q * 2 + 1];
145
for (int i = 0; i < num_grid_y; i++)
147
for (int j = 0; j < num_grid_x; j++)
149
// find class index with max class score
151
float class_score = -FLT_MAX;
152
for (int k = 0; k < num_class; k++)
154
float score = feat_blob.channel(q * feat_offset + 5 + k).row(i)[j];
155
if (score > class_score)
162
float box_score = feat_blob.channel(q * feat_offset + 4).row(i)[j];
164
float confidence = sigmoid(box_score) * sigmoid(class_score);
166
if (confidence >= prob_threshold)
168
// yolov5/models/yolo.py Detect forward
169
// y = x[i].sigmoid()
170
// y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
171
// y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
173
float dx = sigmoid(feat_blob.channel(q * feat_offset + 0).row(i)[j]);
174
float dy = sigmoid(feat_blob.channel(q * feat_offset + 1).row(i)[j]);
175
float dw = sigmoid(feat_blob.channel(q * feat_offset + 2).row(i)[j]);
176
float dh = sigmoid(feat_blob.channel(q * feat_offset + 3).row(i)[j]);
178
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
179
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
181
float pb_w = pow(dw * 2.f, 2) * anchor_w;
182
float pb_h = pow(dh * 2.f, 2) * anchor_h;
184
float x0 = pb_cx - pb_w * 0.5f;
185
float y0 = pb_cy - pb_h * 0.5f;
186
float x1 = pb_cx + pb_w * 0.5f;
187
float y1 = pb_cy + pb_h * 0.5f;
192
obj.rect.width = x1 - x0;
193
obj.rect.height = y1 - y0;
194
obj.label = class_index;
195
obj.prob = confidence;
197
objects.push_back(obj);
204
static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
208
yolov5.opt.use_vulkan_compute = true;
209
// yolov5.opt.use_bf16_storage = true;
211
// original pretrained model from https://github.com/ultralytics/yolov5
212
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
213
if (yolov5.load_param("yolov5s.ncnn.param"))
215
if (yolov5.load_model("yolov5s.ncnn.bin"))
218
const int target_size = 640;
219
const float prob_threshold = 0.25f;
220
const float nms_threshold = 0.45f;
222
int img_w = bgr.cols;
223
int img_h = bgr.rows;
225
// yolov5/models/common.py DetectMultiBackend
226
const int max_stride = 64;
228
// letterbox pad to multiple of max_stride
234
scale = (float)target_size / w;
240
scale = (float)target_size / h;
245
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
247
// pad to target_size rectangle
248
// yolov5/utils/datasets.py letterbox
249
int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
250
int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
252
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
254
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
255
in_pad.substract_mean_normalize(0, norm_vals);
257
ncnn::Extractor ex = yolov5.create_extractor();
259
ex.input("in0", in_pad);
261
std::vector<Object> proposals;
263
// anchor setting from yolov5/models/yolov5s.yaml
268
ex.extract("out0", out);
270
ncnn::Mat anchors(6);
278
std::vector<Object> objects8;
279
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
281
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
287
ex.extract("out1", out);
289
ncnn::Mat anchors(6);
297
std::vector<Object> objects16;
298
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
300
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
306
ex.extract("out2", out);
308
ncnn::Mat anchors(6);
316
std::vector<Object> objects32;
317
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
319
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
322
// sort all proposals by score from highest to lowest
323
qsort_descent_inplace(proposals);
325
// apply nms with nms_threshold
326
std::vector<int> picked;
327
nms_sorted_bboxes(proposals, picked, nms_threshold);
329
int count = picked.size();
331
objects.resize(count);
332
for (int i = 0; i < count; i++)
334
objects[i] = proposals[picked[i]];
336
// adjust offset to original unpadded
337
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
338
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
339
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
340
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
343
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
344
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
345
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
346
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
348
objects[i].rect.x = x0;
349
objects[i].rect.y = y0;
350
objects[i].rect.width = x1 - x0;
351
objects[i].rect.height = y1 - y0;
357
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
359
static const char* class_names[] = {
360
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
361
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
362
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
363
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
364
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
365
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
366
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
367
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
368
"hair drier", "toothbrush"
371
cv::Mat image = bgr.clone();
373
for (size_t i = 0; i < objects.size(); i++)
375
const Object& obj = objects[i];
377
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
378
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
380
cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));
383
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
386
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
389
int y = obj.rect.y - label_size.height - baseLine;
392
if (x + label_size.width > image.cols)
393
x = image.cols - label_size.width;
395
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
396
cv::Scalar(255, 255, 255), -1);
398
cv::putText(image, text, cv::Point(x, y + label_size.height),
399
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
402
cv::imshow("image", image);
406
int main(int argc, char** argv)
410
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
414
const char* imagepath = argv[1];
416
cv::Mat m = cv::imread(imagepath, 1);
419
fprintf(stderr, "cv::imread %s failed\n", imagepath);
423
std::vector<Object> objects;
424
detect_yolov5(m, objects);
426
draw_objects(m, objects);