ncnn

Форк
0
/
yolov7.cpp 
461 строка · 13.3 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "layer.h"
16
#include "net.h"
17

18
#if defined(USE_NCNN_SIMPLEOCV)
19
#include "simpleocv.h"
20
#else
21
#include <opencv2/core/core.hpp>
22
#include <opencv2/highgui/highgui.hpp>
23
#include <opencv2/imgproc/imgproc.hpp>
24
#endif
25
#include <float.h>
26
#include <stdio.h>
27
#include <vector>
28

29
#define MAX_STRIDE 32
30

31
struct Object
32
{
33
    cv::Rect_<float> rect;
34
    int label;
35
    float prob;
36
};
37

38
static inline float intersection_area(const Object& a, const Object& b)
39
{
40
    cv::Rect_<float> inter = a.rect & b.rect;
41
    return inter.area();
42
}
43

44
static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
45
{
46
    int i = left;
47
    int j = right;
48
    float p = objects[(left + right) / 2].prob;
49

50
    while (i <= j)
51
    {
52
        while (objects[i].prob > p)
53
            i++;
54

55
        while (objects[j].prob < p)
56
            j--;
57

58
        if (i <= j)
59
        {
60
            // swap
61
            std::swap(objects[i], objects[j]);
62

63
            i++;
64
            j--;
65
        }
66
    }
67

68
    #pragma omp parallel sections
69
    {
70
        #pragma omp section
71
        {
72
            if (left < j) qsort_descent_inplace(objects, left, j);
73
        }
74
        #pragma omp section
75
        {
76
            if (i < right) qsort_descent_inplace(objects, i, right);
77
        }
78
    }
79
}
80

81
static void qsort_descent_inplace(std::vector<Object>& objects)
82
{
83
    if (objects.empty())
84
        return;
85

86
    qsort_descent_inplace(objects, 0, objects.size() - 1);
87
}
88

89
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
90
{
91
    picked.clear();
92

93
    const int n = faceobjects.size();
94

95
    std::vector<float> areas(n);
96
    for (int i = 0; i < n; i++)
97
    {
98
        areas[i] = faceobjects[i].rect.area();
99
    }
100

101
    for (int i = 0; i < n; i++)
102
    {
103
        const Object& a = faceobjects[i];
104

105
        int keep = 1;
106
        for (int j = 0; j < (int)picked.size(); j++)
107
        {
108
            const Object& b = faceobjects[picked[j]];
109

110
            if (!agnostic && a.label != b.label)
111
                continue;
112

113
            // intersection over union
114
            float inter_area = intersection_area(a, b);
115
            float union_area = areas[i] + areas[picked[j]] - inter_area;
116
            // float IoU = inter_area / union_area
117
            if (inter_area / union_area > nms_threshold)
118
                keep = 0;
119
        }
120

121
        if (keep)
122
            picked.push_back(i);
123
    }
124
}
125

126
static inline float sigmoid(float x)
127
{
128
    return static_cast<float>(1.f / (1.f + exp(-x)));
129
}
130

131
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
132
{
133
    const int num_grid = feat_blob.h;
134

135
    int num_grid_x;
136
    int num_grid_y;
137
    if (in_pad.w > in_pad.h)
138
    {
139
        num_grid_x = in_pad.w / stride;
140
        num_grid_y = num_grid / num_grid_x;
141
    }
142
    else
143
    {
144
        num_grid_y = in_pad.h / stride;
145
        num_grid_x = num_grid / num_grid_y;
146
    }
147

148
    const int num_class = feat_blob.w - 5;
149

150
    const int num_anchors = anchors.w / 2;
151

152
    for (int q = 0; q < num_anchors; q++)
153
    {
154
        const float anchor_w = anchors[q * 2];
155
        const float anchor_h = anchors[q * 2 + 1];
156

157
        const ncnn::Mat feat = feat_blob.channel(q);
158

159
        for (int i = 0; i < num_grid_y; i++)
160
        {
161
            for (int j = 0; j < num_grid_x; j++)
162
            {
163
                const float* featptr = feat.row(i * num_grid_x + j);
164
                float box_confidence = sigmoid(featptr[4]);
165
                if (box_confidence >= prob_threshold)
166
                {
167
                    // find class index with max class score
168
                    int class_index = 0;
169
                    float class_score = -FLT_MAX;
170
                    for (int k = 0; k < num_class; k++)
171
                    {
172
                        float score = featptr[5 + k];
173
                        if (score > class_score)
174
                        {
175
                            class_index = k;
176
                            class_score = score;
177
                        }
178
                    }
179
                    float confidence = box_confidence * sigmoid(class_score);
180
                    if (confidence >= prob_threshold)
181
                    {
182
                        float dx = sigmoid(featptr[0]);
183
                        float dy = sigmoid(featptr[1]);
184
                        float dw = sigmoid(featptr[2]);
185
                        float dh = sigmoid(featptr[3]);
186

187
                        float pb_cx = (dx * 2.f - 0.5f + j) * stride;
188
                        float pb_cy = (dy * 2.f - 0.5f + i) * stride;
189

190
                        float pb_w = pow(dw * 2.f, 2) * anchor_w;
191
                        float pb_h = pow(dh * 2.f, 2) * anchor_h;
192

193
                        float x0 = pb_cx - pb_w * 0.5f;
194
                        float y0 = pb_cy - pb_h * 0.5f;
195
                        float x1 = pb_cx + pb_w * 0.5f;
196
                        float y1 = pb_cy + pb_h * 0.5f;
197

198
                        Object obj;
199
                        obj.rect.x = x0;
200
                        obj.rect.y = y0;
201
                        obj.rect.width = x1 - x0;
202
                        obj.rect.height = y1 - y0;
203
                        obj.label = class_index;
204
                        obj.prob = confidence;
205

206
                        objects.push_back(obj);
207
                    }
208
                }
209
            }
210
        }
211
    }
212
}
213

214
static int detect_yolov7(const cv::Mat& bgr, std::vector<Object>& objects)
215
{
216
    ncnn::Net yolov7;
217

218
    yolov7.opt.use_vulkan_compute = true;
219
    // yolov7.opt.use_bf16_storage = true;
220

221
    // original pretrained model from https://github.com/WongKinYiu/yolov7
222
    // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
223
    yolov7.load_param("yolov7-tiny.param");
224
    yolov7.load_model("yolov7-tiny.bin");
225

226
    const int target_size = 640;
227
    const float prob_threshold = 0.25f;
228
    const float nms_threshold = 0.45f;
229

230
    int img_w = bgr.cols;
231
    int img_h = bgr.rows;
232

233
    // letterbox pad to multiple of MAX_STRIDE
234
    int w = img_w;
235
    int h = img_h;
236
    float scale = 1.f;
237
    if (w > h)
238
    {
239
        scale = (float)target_size / w;
240
        w = target_size;
241
        h = h * scale;
242
    }
243
    else
244
    {
245
        scale = (float)target_size / h;
246
        h = target_size;
247
        w = w * scale;
248
    }
249

250
    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
251

252
    int wpad = (w + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - w;
253
    int hpad = (h + MAX_STRIDE - 1) / MAX_STRIDE * MAX_STRIDE - h;
254
    ncnn::Mat in_pad;
255
    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
256

257
    const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
258
    in_pad.substract_mean_normalize(0, norm_vals);
259

260
    ncnn::Extractor ex = yolov7.create_extractor();
261

262
    ex.input("images", in_pad);
263

264
    std::vector<Object> proposals;
265

266
    // stride 8
267
    {
268
        ncnn::Mat out;
269
        ex.extract("output", out);
270

271
        ncnn::Mat anchors(6);
272
        anchors[0] = 12.f;
273
        anchors[1] = 16.f;
274
        anchors[2] = 19.f;
275
        anchors[3] = 36.f;
276
        anchors[4] = 40.f;
277
        anchors[5] = 28.f;
278

279
        std::vector<Object> objects8;
280
        generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
281

282
        proposals.insert(proposals.end(), objects8.begin(), objects8.end());
283
    }
284

285
    // stride 16
286
    {
287
        ncnn::Mat out;
288

289
        ex.extract("288", out);
290

291
        ncnn::Mat anchors(6);
292
        anchors[0] = 36.f;
293
        anchors[1] = 75.f;
294
        anchors[2] = 76.f;
295
        anchors[3] = 55.f;
296
        anchors[4] = 72.f;
297
        anchors[5] = 146.f;
298

299
        std::vector<Object> objects16;
300
        generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
301

302
        proposals.insert(proposals.end(), objects16.begin(), objects16.end());
303
    }
304

305
    // stride 32
306
    {
307
        ncnn::Mat out;
308

309
        ex.extract("302", out);
310

311
        ncnn::Mat anchors(6);
312
        anchors[0] = 142.f;
313
        anchors[1] = 110.f;
314
        anchors[2] = 192.f;
315
        anchors[3] = 243.f;
316
        anchors[4] = 459.f;
317
        anchors[5] = 401.f;
318

319
        std::vector<Object> objects32;
320
        generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
321

322
        proposals.insert(proposals.end(), objects32.begin(), objects32.end());
323
    }
324

325
    // sort all proposals by score from highest to lowest
326
    qsort_descent_inplace(proposals);
327

328
    // apply nms with nms_threshold
329
    std::vector<int> picked;
330
    nms_sorted_bboxes(proposals, picked, nms_threshold);
331

332
    int count = picked.size();
333

334
    objects.resize(count);
335
    for (int i = 0; i < count; i++)
336
    {
337
        objects[i] = proposals[picked[i]];
338

339
        // adjust offset to original unpadded
340
        float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
341
        float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
342
        float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
343
        float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
344

345
        // clip
346
        x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
347
        y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
348
        x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
349
        y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
350

351
        objects[i].rect.x = x0;
352
        objects[i].rect.y = y0;
353
        objects[i].rect.width = x1 - x0;
354
        objects[i].rect.height = y1 - y0;
355
    }
356

357
    return 0;
358
}
359

360
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
361
{
362
    static const char* class_names[] = {
363
        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
364
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
365
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
366
        "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
367
        "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
368
        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
369
        "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
370
        "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
371
        "hair drier", "toothbrush"
372
    };
373

374
    static const unsigned char colors[19][3] = {
375
        {54, 67, 244},
376
        {99, 30, 233},
377
        {176, 39, 156},
378
        {183, 58, 103},
379
        {181, 81, 63},
380
        {243, 150, 33},
381
        {244, 169, 3},
382
        {212, 188, 0},
383
        {136, 150, 0},
384
        {80, 175, 76},
385
        {74, 195, 139},
386
        {57, 220, 205},
387
        {59, 235, 255},
388
        {7, 193, 255},
389
        {0, 152, 255},
390
        {34, 87, 255},
391
        {72, 85, 121},
392
        {158, 158, 158},
393
        {139, 125, 96}
394
    };
395

396
    int color_index = 0;
397

398
    cv::Mat image = bgr.clone();
399

400
    for (size_t i = 0; i < objects.size(); i++)
401
    {
402
        const Object& obj = objects[i];
403

404
        const unsigned char* color = colors[color_index % 19];
405
        color_index++;
406

407
        cv::Scalar cc(color[0], color[1], color[2]);
408

409
        fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
410
                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
411

412
        cv::rectangle(image, obj.rect, cc, 2);
413

414
        char text[256];
415
        sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
416

417
        int baseLine = 0;
418
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
419

420
        int x = obj.rect.x;
421
        int y = obj.rect.y - label_size.height - baseLine;
422
        if (y < 0)
423
            y = 0;
424
        if (x + label_size.width > image.cols)
425
            x = image.cols - label_size.width;
426

427
        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
428
                      cc, -1);
429

430
        cv::putText(image, text, cv::Point(x, y + label_size.height),
431
                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 255, 255));
432
    }
433

434
    cv::imshow("image", image);
435
    cv::waitKey(0);
436
}
437

438
int main(int argc, char** argv)
439
{
440
    if (argc != 2)
441
    {
442
        fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
443
        return -1;
444
    }
445

446
    const char* imagepath = argv[1];
447

448
    cv::Mat m = cv::imread(imagepath, 1);
449
    if (m.empty())
450
    {
451
        fprintf(stderr, "cv::imread %s failed\n", imagepath);
452
        return -1;
453
    }
454

455
    std::vector<Object> objects;
456
    detect_yolov7(m, objects);
457

458
    draw_objects(m, objects);
459

460
    return 0;
461
}
462

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.