1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
18
#if defined(USE_NCNN_SIMPLEOCV)
21
#include <opencv2/core/core.hpp>
22
#include <opencv2/highgui/highgui.hpp>
23
#include <opencv2/imgproc/imgproc.hpp>
33
cv::Rect_<float> rect;
38
static inline float intersection_area(const Object& a, const Object& b)
40
cv::Rect_<float> inter = a.rect & b.rect;
44
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
48
float p = objects[(left + right) / 2].prob;
52
while (objects[i].prob > p)
55
while (objects[j].prob < p)
61
std::swap(objects[i], objects[j]);
68
#pragma omp parallel sections
72
if (left < j) qsort_descent_inplace(objects, left, j);
76
if (i < right) qsort_descent_inplace(objects, i, right);
81
static void qsort_descent_inplace(std::vector<Object>& objects)
86
qsort_descent_inplace(objects, 0, objects.size() - 1);
89
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
93
const int n = faceobjects.size();
95
std::vector<float> areas(n);
96
for (int i = 0; i < n; i++)
98
areas[i] = faceobjects[i].rect.area();
101
for (int i = 0; i < n; i++)
103
const Object& a = faceobjects[i];
106
for (int j = 0; j < (int)picked.size(); j++)
108
const Object& b = faceobjects[picked[j]];
110
if (!agnostic && a.label != b.label)
113
// intersection over union
114
float inter_area = intersection_area(a, b);
115
float union_area = areas[i] + areas[picked[j]] - inter_area;
116
// float IoU = inter_area / union_area
117
if (inter_area / union_area > nms_threshold)
126
static inline float sigmoid(float x)
128
return static_cast<float>(1.f / (1.f + exp(-x)));
131
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
133
const int num_grid = feat_blob.h;
137
if (in_pad.w > in_pad.h)
139
num_grid_x = in_pad.w / stride;
140
num_grid_y = num_grid / num_grid_x;
144
num_grid_y = in_pad.h / stride;
145
num_grid_x = num_grid / num_grid_y;
148
const int num_class = feat_blob.w - 5;
150
const int num_anchors = anchors.w / 2;
152
for (int q = 0; q < num_anchors; q++)
154
const float anchor_w = anchors[q * 2];
155
const float anchor_h = anchors[q * 2 + 1];
157
const ncnn::Mat feat = feat_blob.channel(q);
159
for (int i = 0; i < num_grid_y; i++)
161
for (int j = 0; j < num_grid_x; j++)
163
const float* featptr = feat.row(i * num_grid_x + j);
164
float box_confidence = sigmoid(featptr[4]);
165
if (box_confidence >= prob_threshold)
167
// find class index with max class score
169
float class_score = -FLT_MAX;
170
for (int k = 0; k < num_class; k++)
172
float score = featptr[5 + k];
173
if (score > class_score)
179
float confidence = box_confidence * sigmoid(class_score);
180
if (confidence >= prob_threshold)
182
float dx = sigmoid(featptr[0]);
183
float dy = sigmoid(featptr[1]);
184
float dw = sigmoid(featptr[2]);
185
float dh = sigmoid(featptr[3]);
187
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
188
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
190
float pb_w = pow(dw * 2.f, 2) * anchor_w;
191
float pb_h = pow(dh * 2.f, 2) * anchor_h;
193
float x0 = pb_cx - pb_w * 0.5f;
194
float y0 = pb_cy - pb_h * 0.5f;
195
float x1 = pb_cx + pb_w * 0.5f;
196
float y1 = pb_cy + pb_h * 0.5f;
201
obj.rect.width = x1 - x0;
202
obj.rect.height = y1 - y0;
203
obj.label = class_index;
204
obj.prob = confidence;
206
objects.push_back(obj);
214
static int detect_yolov7(const cv::Mat& bgr, std::vector<Object>& objects)
218
yolov7.opt.use_vulkan_compute = true;
219
// yolov7.opt.use_bf16_storage = true;
221
// original pretrained model from https://github.com/WongKinYiu/yolov7
222
// the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
223
yolov7.load_param("yolov7-tiny.param");
224
yolov7.load_model("yolov7-tiny.bin");
226
const int target_size = 640;
227
const float prob_threshold = 0.25f;
228
const float nms_threshold = 0.45f;
230
int img_w = bgr.cols;
231
int img_h = bgr.rows;
233
// letterbox pad to multiple of MAX_STRIDE
239
scale = (float)target_size / w;
245
scale = (float)target_size / h;
250
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
252
int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
253
int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;
255
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
257
const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
258
in_pad.substract_mean_normalize(0, norm_vals);
260
ncnn::Extractor ex = yolov7.create_extractor();
262
ex.input("images", in_pad);
264
std::vector<Object> proposals;
269
ex.extract("output", out);
271
ncnn::Mat anchors(6);
279
std::vector<Object> objects8;
280
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
282
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
289
ex.extract("288", out);
291
ncnn::Mat anchors(6);
299
std::vector<Object> objects16;
300
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
302
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
309
ex.extract("302", out);
311
ncnn::Mat anchors(6);
319
std::vector<Object> objects32;
320
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
322
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
325
// sort all proposals by score from highest to lowest
326
qsort_descent_inplace(proposals);
328
// apply nms with nms_threshold
329
std::vector<int> picked;
330
nms_sorted_bboxes(proposals, picked, nms_threshold);
332
int count = picked.size();
334
objects.resize(count);
335
for (int i = 0; i < count; i++)
337
objects[i] = proposals[picked[i]];
339
// adjust offset to original unpadded
340
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
341
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
342
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
343
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
346
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
347
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
348
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
349
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
351
objects[i].rect.x = x0;
352
objects[i].rect.y = y0;
353
objects[i].rect.width = x1 - x0;
354
objects[i].rect.height = y1 - y0;
360
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
362
static const char* class_names[] = {
363
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
364
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
365
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
366
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
367
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
368
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
369
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
370
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
371
"hair drier", "toothbrush"
374
static const unsigned char colors[19][3] = {
398
cv::Mat image = bgr.clone();
400
for (size_t i = 0; i < objects.size(); i++)
402
const Object& obj = objects[i];
404
const unsigned char* color = colors[color_index % 19];
407
cv::Scalar cc(color[0], color[1], color[2]);
409
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
410
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
412
cv::rectangle(image, obj.rect, cc, 2);
415
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
418
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
421
int y = obj.rect.y - label_size.height - baseLine;
424
if (x + label_size.width > image.cols)
425
x = image.cols - label_size.width;
427
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
430
cv::putText(image, text, cv::Point(x, y + label_size.height),
431
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255));
434
cv::imshow("image", image);
438
int main(int argc, char** argv)
442
fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
446
const char* imagepath = argv[1];
448
cv::Mat m = cv::imread(imagepath, 1);
451
fprintf(stderr, "cv::imread %s failed\n", imagepath);
455
std::vector<Object> objects;
456
detect_yolov7(m, objects);
458
draw_objects(m, objects);