ncnn

benchncnn.cpp
478 строк · 12.3 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include <float.h>
16
#include <stdio.h>
17
#include <string.h>
18

19
#ifdef __EMSCRIPTEN__
20
#include <emscripten.h>
21
#endif
22

23
#include "benchmark.h"
24
#include "cpu.h"
25
#include "datareader.h"
26
#include "net.h"
27
#include "gpu.h"
28

29
#ifndef NCNN_SIMPLESTL
30
#include <vector>
31
#endif
32

33
class DataReaderFromEmpty : public ncnn::DataReader
34
{
35
public:
36
    virtual int scan(const char* format, void* p) const
37
    {
38
        return 0;
39
    }
40
    virtual size_t read(void* buf, size_t size) const
41
    {
42
        memset(buf, 0, size);
43
        return size;
44
    }
45
};
46

47
static int g_warmup_loop_count = 8;
48
static int g_loop_count = 4;
49
static bool g_enable_cooling_down = true;
50

51
static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
52
static ncnn::PoolAllocator g_workspace_pool_allocator;
53

54
#if NCNN_VULKAN
55
static ncnn::VulkanDevice* g_vkdev = 0;
56
static ncnn::VkAllocator* g_blob_vkallocator = 0;
57
static ncnn::VkAllocator* g_staging_vkallocator = 0;
58
#endif // NCNN_VULKAN
59

60
void benchmark(const char* comment, const std::vector<ncnn::Mat>& _in, const ncnn::Option& opt, bool fixed_path = true)
61
{
62
    g_blob_pool_allocator.clear();
63
    g_workspace_pool_allocator.clear();
64

65
#if NCNN_VULKAN
66
    if (opt.use_vulkan_compute)
67
    {
68
        g_blob_vkallocator->clear();
69
        g_staging_vkallocator->clear();
70
    }
71
#endif // NCNN_VULKAN
72

73
    ncnn::Net net;
74

75
    net.opt = opt;
76

77
#if NCNN_VULKAN
78
    if (net.opt.use_vulkan_compute)
79
    {
80
        net.set_vulkan_device(g_vkdev);
81
    }
82
#endif // NCNN_VULKAN
83

84
#ifdef __EMSCRIPTEN__
85
#define MODEL_DIR "/working/"
86
#else
87
#define MODEL_DIR ""
88
#endif
89

90
    if (fixed_path)
91
    {
92
        char parampath[256];
93
        sprintf(parampath, MODEL_DIR "%s.param", comment);
94
        net.load_param(parampath);
95
    }
96
    else
97
    {
98
        net.load_param(comment);
99
    }
100

101
    DataReaderFromEmpty dr;
102
    net.load_model(dr);
103

104
    const std::vector<const char*>& input_names = net.input_names();
105
    const std::vector<const char*>& output_names = net.output_names();
106

107
    if (g_enable_cooling_down)
108
    {
109
        // sleep 10 seconds for cooling down SOC  :(
110
        ncnn::sleep(10 * 1000);
111
    }
112

113
    if (input_names.size() > _in.size())
114
    {
115
        fprintf(stderr, "input %ld tensors while model has %ld inputs\n", _in.size(), input_names.size());
116
        return;
117
    }
118

119
    // initialize input
120
    for (size_t j = 0; j < input_names.size(); ++j)
121
    {
122
        ncnn::Mat in = _in[j];
123
        in.fill(0.01f);
124
    }
125

126
    // warm up
127
    for (int i = 0; i < g_warmup_loop_count; i++)
128
    {
129
        ncnn::Extractor ex = net.create_extractor();
130
        for (size_t j = 0; j < input_names.size(); ++j)
131
        {
132
            ncnn::Mat in = _in[j];
133
            ex.input(input_names[j], in);
134
        }
135

136
        for (size_t j = 0; j < output_names.size(); ++j)
137
        {
138
            ncnn::Mat out;
139
            ex.extract(output_names[j], out);
140
        }
141
    }
142

143
    double time_min = DBL_MAX;
144
    double time_max = -DBL_MAX;
145
    double time_avg = 0;
146

147
    for (int i = 0; i < g_loop_count; i++)
148
    {
149
        double start = ncnn::get_current_time();
150
        {
151
            ncnn::Extractor ex = net.create_extractor();
152
            for (size_t j = 0; j < input_names.size(); ++j)
153
            {
154
                ncnn::Mat in = _in[j];
155
                ex.input(input_names[j], in);
156
            }
157

158
            for (size_t j = 0; j < output_names.size(); ++j)
159
            {
160
                ncnn::Mat out;
161
                ex.extract(output_names[j], out);
162
            }
163
        }
164

165
        double end = ncnn::get_current_time();
166

167
        double time = end - start;
168

169
        time_min = std::min(time_min, time);
170
        time_max = std::max(time_max, time);
171
        time_avg += time;
172
    }
173

174
    time_avg /= g_loop_count;
175

176
    fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", comment, time_min, time_max, time_avg);
177
}
178

179
void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Option& opt, bool fixed_path = true)
180
{
181
    std::vector<ncnn::Mat> inputs;
182
    inputs.push_back(_in);
183
    return benchmark(comment, inputs, opt, fixed_path);
184
}
185

186
void show_usage()
187
{
188
    fprintf(stderr, "Usage: benchncnn [loop count] [num threads] [powersave] [gpu device] [cooling down] [(key=value)...]\n");
189
    fprintf(stderr, "  param=model.param\n");
190
    fprintf(stderr, "  shape=[227,227,3],...\n");
191
}
192

193
static std::vector<ncnn::Mat> parse_shape_list(char* s)
194
{
195
    std::vector<std::vector<int> > shapes;
196
    std::vector<ncnn::Mat> mats;
197

198
    char* pch = strtok(s, "[]");
199
    while (pch != NULL)
200
    {
201
        // parse a,b,c
202
        int v;
203
        int nconsumed = 0;
204
        int nscan = sscanf(pch, "%d%n", &v, &nconsumed);
205
        if (nscan == 1)
206
        {
207
            // ok we get shape
208
            pch += nconsumed;
209

210
            std::vector<int> s;
211
            s.push_back(v);
212

213
            nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
214
            while (nscan == 1)
215
            {
216
                pch += nconsumed;
217

218
                s.push_back(v);
219

220
                nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
221
            }
222

223
            // shape end
224
            shapes.push_back(s);
225
        }
226

227
        pch = strtok(NULL, "[]");
228
    }
229

230
    for (size_t i = 0; i < shapes.size(); ++i)
231
    {
232
        const std::vector<int>& shape = shapes[i];
233
        switch (shape.size())
234
        {
235
        case 4:
236
            mats.push_back(ncnn::Mat(shape[0], shape[1], shape[2], shape[3]));
237
            break;
238
        case 3:
239
            mats.push_back(ncnn::Mat(shape[0], shape[1], shape[2]));
240
            break;
241
        case 2:
242
            mats.push_back(ncnn::Mat(shape[0], shape[1]));
243
            break;
244
        case 1:
245
            mats.push_back(ncnn::Mat(shape[0]));
246
            break;
247
        default:
248
            fprintf(stderr, "unsupported input shape size %ld\n", shape.size());
249
            break;
250
        }
251
    }
252
    return mats;
253
}
254

255
int main(int argc, char** argv)
256
{
257
    int loop_count = 4;
258
    int num_threads = ncnn::get_physical_big_cpu_count();
259
    int powersave = 2;
260
    int gpu_device = -1;
261
    int cooling_down = 1;
262
    char* model = 0;
263
    std::vector<ncnn::Mat> inputs;
264

265
    for (int i = 1; i < argc; i++)
266
    {
267
        if (argv[i][0] == '-' && argv[i][1] == 'h')
268
        {
269
            show_usage();
270
            return -1;
271
        }
272

273
        if (strcmp(argv[i], "--help") == 0)
274
        {
275
            show_usage();
276
            return -1;
277
        }
278
    }
279

280
    if (argc >= 2)
281
    {
282
        loop_count = atoi(argv[1]);
283
    }
284
    if (argc >= 3)
285
    {
286
        num_threads = atoi(argv[2]);
287
    }
288
    if (argc >= 4)
289
    {
290
        powersave = atoi(argv[3]);
291
    }
292
    if (argc >= 5)
293
    {
294
        gpu_device = atoi(argv[4]);
295
    }
296
    if (argc >= 6)
297
    {
298
        cooling_down = atoi(argv[5]);
299
    }
300

301
    for (int i = 6; i < argc; i++)
302
    {
303
        // key=value
304
        char* kv = argv[i];
305

306
        char* eqs = strchr(kv, '=');
307
        if (eqs == NULL)
308
        {
309
            fprintf(stderr, "unrecognized arg %s\n", kv);
310
            continue;
311
        }
312

313
        // split k v
314
        eqs[0] = '\0';
315
        const char* key = kv;
316
        char* value = eqs + 1;
317

318
        if (strcmp(key, "param") == 0)
319
            model = value;
320
        if (strcmp(key, "shape") == 0)
321
            inputs = parse_shape_list(value);
322
    }
323

324
    if (model && inputs.empty())
325
    {
326
        fprintf(stderr, "input tensor shape empty!\n");
327
        return -1;
328
    }
329

330
#ifdef __EMSCRIPTEN__
331
    EM_ASM(
332
        FS.mkdir('/working');
333
        FS.mount(NODEFS, {root: '.'}, '/working'););
334
#endif // __EMSCRIPTEN__
335

336
    bool use_vulkan_compute = gpu_device != -1;
337

338
    g_enable_cooling_down = cooling_down != 0;
339

340
    g_loop_count = loop_count;
341

342
    g_blob_pool_allocator.set_size_compare_ratio(0.f);
343
    g_workspace_pool_allocator.set_size_compare_ratio(0.f);
344

345
#if NCNN_VULKAN
346
    if (use_vulkan_compute)
347
    {
348
        g_warmup_loop_count = 10;
349

350
        g_vkdev = ncnn::get_gpu_device(gpu_device);
351

352
        g_blob_vkallocator = new ncnn::VkBlobAllocator(g_vkdev);
353
        g_staging_vkallocator = new ncnn::VkStagingAllocator(g_vkdev);
354
    }
355
#endif // NCNN_VULKAN
356

357
    ncnn::set_cpu_powersave(powersave);
358

359
    ncnn::set_omp_dynamic(0);
360
    ncnn::set_omp_num_threads(num_threads);
361

362
    // default option
363
    ncnn::Option opt;
364
    opt.lightmode = true;
365
    opt.num_threads = num_threads;
366
    opt.blob_allocator = &g_blob_pool_allocator;
367
    opt.workspace_allocator = &g_workspace_pool_allocator;
368
#if NCNN_VULKAN
369
    opt.blob_vkallocator = g_blob_vkallocator;
370
    opt.workspace_vkallocator = g_blob_vkallocator;
371
    opt.staging_vkallocator = g_staging_vkallocator;
372
#endif // NCNN_VULKAN
373
    opt.use_winograd_convolution = true;
374
    opt.use_sgemm_convolution = true;
375
    opt.use_int8_inference = true;
376
    opt.use_vulkan_compute = use_vulkan_compute;
377
    opt.use_fp16_packed = true;
378
    opt.use_fp16_storage = true;
379
    opt.use_fp16_arithmetic = true;
380
    opt.use_int8_storage = true;
381
    opt.use_int8_arithmetic = true;
382
    opt.use_packing_layout = true;
383
    opt.use_shader_pack8 = false;
384
    opt.use_image_storage = false;
385

386
    fprintf(stderr, "loop_count = %d\n", g_loop_count);
387
    fprintf(stderr, "num_threads = %d\n", num_threads);
388
    fprintf(stderr, "powersave = %d\n", ncnn::get_cpu_powersave());
389
    fprintf(stderr, "gpu_device = %d\n", gpu_device);
390
    fprintf(stderr, "cooling_down = %d\n", (int)g_enable_cooling_down);
391

392
    if (model != 0)
393
    {
394
        // run user defined benchmark
395
        benchmark(model, inputs, opt, false);
396
    }
397
    else
398
    {
399
        // run default cases
400
        benchmark("squeezenet", ncnn::Mat(227, 227, 3), opt);
401

402
        benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3), opt);
403

404
        benchmark("mobilenet", ncnn::Mat(224, 224, 3), opt);
405

406
        benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3), opt);
407

408
        benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3), opt);
409

410
        // benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3), opt);
411

412
        benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3), opt);
413

414
        benchmark("shufflenet", ncnn::Mat(224, 224, 3), opt);
415

416
        benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3), opt);
417

418
        benchmark("mnasnet", ncnn::Mat(224, 224, 3), opt);
419

420
        benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3), opt);
421

422
        benchmark("efficientnet_b0", ncnn::Mat(224, 224, 3), opt);
423

424
        benchmark("efficientnetv2_b0", ncnn::Mat(224, 224, 3), opt);
425

426
        benchmark("regnety_400m", ncnn::Mat(224, 224, 3), opt);
427

428
        benchmark("blazeface", ncnn::Mat(128, 128, 3), opt);
429

430
        benchmark("googlenet", ncnn::Mat(224, 224, 3), opt);
431

432
        benchmark("googlenet_int8", ncnn::Mat(224, 224, 3), opt);
433

434
        benchmark("resnet18", ncnn::Mat(224, 224, 3), opt);
435

436
        benchmark("resnet18_int8", ncnn::Mat(224, 224, 3), opt);
437

438
        benchmark("alexnet", ncnn::Mat(227, 227, 3), opt);
439

440
        benchmark("vgg16", ncnn::Mat(224, 224, 3), opt);
441

442
        benchmark("vgg16_int8", ncnn::Mat(224, 224, 3), opt);
443

444
        benchmark("resnet50", ncnn::Mat(224, 224, 3), opt);
445

446
        benchmark("resnet50_int8", ncnn::Mat(224, 224, 3), opt);
447

448
        benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3), opt);
449

450
        benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3), opt);
451

452
        benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3), opt);
453

454
        benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3), opt);
455

456
        benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3), opt);
457

458
        benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3), opt);
459

460
        benchmark("yolov4-tiny", ncnn::Mat(416, 416, 3), opt);
461

462
        benchmark("nanodet_m", ncnn::Mat(320, 320, 3), opt);
463

464
        benchmark("yolo-fastest-1.1", ncnn::Mat(320, 320, 3), opt);
465

466
        benchmark("yolo-fastestv2", ncnn::Mat(352, 352, 3), opt);
467

468
        benchmark("vision_transformer", ncnn::Mat(384, 384, 3), opt);
469

470
        benchmark("FastestDet", ncnn::Mat(352, 352, 3), opt);
471
    }
472
#if NCNN_VULKAN
473
    delete g_blob_vkallocator;
474
    delete g_staging_vkallocator;
475
#endif // NCNN_VULKAN
476

477
    return 0;
478
}
479
ncnn

Использование cookies