ncnn

Форк
0
/
ncnnoptimize.cpp 
2856 строк · 83.5 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#ifdef _MSC_VER
16
#define _CRT_SECURE_NO_DEPRECATE
17
#endif
18

19
#include <algorithm>
20
#include <map>
21
#include <set>
22
#include <vector>
23

24
// ncnn public header
25
#include "datareader.h"
26
#include "layer.h"
27
#include "layer_type.h"
28
#include "net.h"
29

30
// ncnn private header
31
#include "modelwriter.h"
32

33
class DataReaderFromEmpty : public ncnn::DataReader
34
{
35
public:
36
    virtual int scan(const char* format, void* p) const
37
    {
38
        return 0;
39
    }
40
    virtual size_t read(void* buf, size_t size) const
41
    {
42
        memset(buf, 0, size);
43
        return size;
44
    }
45
};
46

47
class NetOptimize : public ModelWriter
48
{
49
public:
50
    NetOptimize();
51

52
public:
53
    int fuse_batchnorm_scale();
54
    int fuse_convolution_batchnorm();
55
    int fuse_convolution_mul();
56
    int fuse_convolution_add();
57
    int fuse_convolutiondepthwise_batchnorm();
58
    int fuse_convolutiondepthwise_mul();
59
    int fuse_convolutiondepthwise_add();
60
    int fuse_deconvolution_batchnorm();
61
    int fuse_deconvolution_mul();
62
    int fuse_deconvolution_add();
63
    int fuse_deconvolutiondepthwise_batchnorm();
64
    int fuse_innerproduct_batchnorm();
65
    int fuse_innerproduct_add();
66
    int fuse_innerproduct_dropout();
67
    int fuse_convolution_activation();
68
    int fuse_convolutiondepthwise_activation();
69
    int fuse_deconvolution_activation();
70
    int fuse_deconvolutiondepthwise_activation();
71
    int fuse_innerproduct_activation();
72
    int fuse_memorydata_binaryop();
73
    int fuse_binaryop_eltwise();
74

75
    int eliminate_dropout();
76
    int eliminate_pooling1x1();
77
    int eliminate_noop();
78
    int eliminate_split();
79
    int eliminate_orphaned_memorydata();
80
    int eliminate_flatten_after_global_pooling();
81
    int eliminate_reshape_after_global_pooling();
82
    int eliminate_flatten_after_innerproduct();
83
    int eliminate_reshape_before_binaryop();
84

85
    int replace_reduction_with_global_pooling();
86
    int replace_prelu_with_leaky_relu();
87
    int replace_convolution_with_innerproduct_after_global_pooling();
88
    int replace_convolution_with_innerproduct_after_innerproduct();
89
};
90

91
NetOptimize::NetOptimize()
92
    : ModelWriter()
93
{
94
}
95

96
int NetOptimize::fuse_batchnorm_scale()
97
{
98
    const size_t layer_count = layers.size();
99
    for (size_t i = 0; i < layer_count; i++)
100
    {
101
        if (layers[i]->type != "BatchNorm")
102
            continue;
103

104
        // BatchNorm - Scale
105
        int top_blob_index = layers[i]->tops[0];
106

107
        size_t j = i + 1;
108
        for (; j < layer_count; j++)
109
        {
110
            if (layers[j]->type != "Scale")
111
                continue;
112

113
            if (layers[j]->bottoms.size() != 1)
114
                continue;
115

116
            if (layers[j]->bottoms[0] == top_blob_index)
117
                break;
118
        }
119

120
        if (j == layer_count)
121
            continue;
122

123
        // fuse BatchNorm - Scale to BatchNorm
124
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[i];
125
        ncnn::Scale* scale = (ncnn::Scale*)layers[j];
126

127
        fprintf(stderr, "fuse_batchnorm_scale %s %s\n", batchnorm->name.c_str(), scale->name.c_str());
128

129
        {
130
            //             v = ((v - mean) / sqrt(var + eps) * slope + bias) * s + b
131
            //               =  (v - mean) / sqrt(var + eps) * (slope * s) + (bias * s + b)
132

133
            int channels = batchnorm->channels;
134

135
            float* slope = batchnorm->slope_data;
136
            float* bias = batchnorm->bias_data;
137

138
            for (int q = 0; q < channels; q++)
139
            {
140
                slope[q] = slope[q] * scale->scale_data[q];
141
                if (scale->bias_term)
142
                    bias[q] = bias[q] * scale->scale_data[q] + scale->bias_data[q];
143
                else
144
                    bias[q] = bias[q] * scale->scale_data[q];
145
            }
146
        }
147

148
        int top_blob_index_final = scale->tops[0];
149
        batchnorm->tops[0] = top_blob_index_final;
150
        blobs[top_blob_index_final].producer = i;
151
        scale->type = "ncnnfused";
152
    }
153

154
    return 0;
155
}
156

157
int NetOptimize::fuse_convolution_batchnorm()
158
{
159
    const size_t layer_count = layers.size();
160
    for (size_t i = 0; i < layer_count; i++)
161
    {
162
        if (layers[i]->type != "Convolution")
163
            continue;
164

165
        // Convolution - BatchNorm
166
        int top_blob_index = layers[i]->tops[0];
167

168
        size_t j = i + 1;
169
        for (; j < layer_count; j++)
170
        {
171
            if (layers[j]->type != "BatchNorm")
172
                continue;
173

174
            if (layers[j]->bottoms.size() != 1)
175
                continue;
176

177
            if (layers[j]->bottoms[0] == top_blob_index)
178
                break;
179
        }
180

181
        if (j == layer_count)
182
            continue;
183

184
        // fuse Convolution - BatchNorm to Convolution
185
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
186
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
187

188
        fprintf(stderr, "fuse_convolution_batchnorm %s %s\n", convolution->name.c_str(), batchnorm->name.c_str());
189

190
        {
191
            int channels = batchnorm->channels;
192
            float eps = batchnorm->eps;
193

194
            // a = bias - slope * mean / sqrt(var + eps)
195
            // b = slope / sqrt(var + eps)
196
            // value = value * b + a
197

198
            std::vector<float> a(channels);
199
            std::vector<float> b(channels);
200
            for (int i = 0; i < channels; i++)
201
            {
202
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
203
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
204
                b[i] = batchnorm->slope_data[i] / sqrt_var;
205
            }
206

207
            if (convolution->bias_term == 0)
208
            {
209
                // init bias as zero
210
                convolution->bias_term = 1;
211
                convolution->bias_data = ncnn::Mat(channels);
212
                convolution->bias_data.fill(0.f);
213
            }
214

215
            const int weight_per_outch = convolution->weight_data_size / channels;
216

217
            float* weight = convolution->weight_data;
218
            float* bias = convolution->bias_data;
219
            for (int i = 0; i < channels; i++)
220
            {
221
                float* conv_weight_outch = weight + weight_per_outch * i;
222
                for (int j = 0; j < weight_per_outch; j++)
223
                {
224
                    conv_weight_outch[j] *= b[i];
225
                }
226

227
                bias[i] = bias[i] * b[i] + a[i];
228
            }
229
        }
230

231
        int top_blob_index_final = batchnorm->tops[0];
232
        convolution->tops[0] = top_blob_index_final;
233
        blobs[top_blob_index_final].producer = i;
234
        batchnorm->type = "ncnnfused";
235
    }
236

237
    return 0;
238
}
239

240
int NetOptimize::fuse_convolution_mul()
241
{
242
    const size_t layer_count = layers.size();
243
    for (size_t i = 0; i < layer_count; i++)
244
    {
245
        if (layers[i]->type != "Convolution")
246
            continue;
247

248
        // Convolution - BinaryOp
249
        int top_blob_index = layers[i]->tops[0];
250

251
        size_t j = i + 1;
252
        for (; j < layer_count; j++)
253
        {
254
            if (layers[j]->type != "BinaryOp")
255
                continue;
256

257
            if (layers[j]->bottoms.size() != 2)
258
                continue;
259

260
            if (layers[j]->bottoms[0] == top_blob_index)
261
                break;
262
        }
263

264
        if (j == layer_count)
265
            continue;
266

267
        // fuse Convolution - BinaryOp to Convolution
268
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
269
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
270

271
        if (binaryop->op_type != 2 || binaryop->with_scalar)
272
            continue;
273

274
        // MemoryData - ..... - BinaryOp
275
        size_t k = 0;
276
        for (; k < j; k++)
277
        {
278
            if (layers[k]->type != "MemoryData")
279
                continue;
280

281
            if (layers[k]->tops[0] == binaryop->bottoms[1])
282
                break;
283
        }
284

285
        if (k == j)
286
            continue;
287

288
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[k];
289

290
        int channels = convolution->num_output;
291

292
        if (memorydata->w != channels || memorydata->h != 0 || memorydata->c != 0)
293
        {
294
            // not bias-like broadcasting type
295
            continue;
296
        }
297

298
        fprintf(stderr, "fuse_convolution_mul %s %s\n", convolution->name.c_str(), binaryop->name.c_str());
299

300
        {
301
            const int weight_per_outch = convolution->weight_data_size / channels;
302

303
            float* weight = convolution->weight_data;
304
            float* bias = convolution->bias_data;
305
            for (int i = 0; i < channels; i++)
306
            {
307
                float* conv_weight_outch = weight + weight_per_outch * i;
308
                for (int j = 0; j < weight_per_outch; j++)
309
                {
310
                    conv_weight_outch[j] *= memorydata->data[i];
311
                }
312

313
                if (bias)
314
                {
315
                    bias[i] = bias[i] * memorydata->data[i];
316
                }
317
            }
318
        }
319

320
        int top_blob_index_final = binaryop->tops[0];
321
        convolution->tops[0] = top_blob_index_final;
322
        blobs[top_blob_index_final].producer = i;
323
        binaryop->type = "ncnnfused";
324
    }
325

326
    return 0;
327
}
328

329
int NetOptimize::fuse_convolution_add()
330
{
331
    const size_t layer_count = layers.size();
332
    for (size_t i = 0; i < layer_count; i++)
333
    {
334
        if (layers[i]->type != "Convolution")
335
            continue;
336

337
        // Convolution - BinaryOp
338
        int top_blob_index = layers[i]->tops[0];
339

340
        size_t j = i + 1;
341
        for (; j < layer_count; j++)
342
        {
343
            if (layers[j]->type != "BinaryOp")
344
                continue;
345

346
            if (layers[j]->bottoms.size() != 2)
347
                continue;
348

349
            if (layers[j]->bottoms[0] == top_blob_index)
350
                break;
351
        }
352

353
        if (j == layer_count)
354
            continue;
355

356
        // fuse Convolution - BinaryOp to Convolution
357
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
358
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
359

360
        if (binaryop->op_type != 0 || binaryop->with_scalar)
361
            continue;
362

363
        // MemoryData - ..... - BinaryOp
364
        size_t k = 0;
365
        for (; k < j; k++)
366
        {
367
            if (layers[k]->type != "MemoryData")
368
                continue;
369

370
            if (layers[k]->tops[0] == binaryop->bottoms[1])
371
                break;
372
        }
373

374
        if (k == j)
375
            continue;
376

377
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[k];
378

379
        int channels = convolution->num_output;
380

381
        bool broadcasting_type_ok = false;
382
        if (memorydata->w == channels && memorydata->h == 0 && memorydata->c == 0)
383
            broadcasting_type_ok = true;
384
        if (memorydata->w == 1 && memorydata->h == 1 && memorydata->c == channels)
385
            broadcasting_type_ok = true;
386

387
        if (!broadcasting_type_ok)
388
        {
389
            // not bias-like broadcasting type
390
            continue;
391
        }
392

393
        fprintf(stderr, "fuse_convolution_add %s %s\n", convolution->name.c_str(), binaryop->name.c_str());
394

395
        ncnn::Mat bias_data = memorydata->data.reshape(channels);
396
        {
397
            if (convolution->bias_term == 0)
398
            {
399
                // init bias
400
                convolution->bias_term = 1;
401
                convolution->bias_data = bias_data;
402
            }
403
            else
404
            {
405
                float* bias = convolution->bias_data;
406
                for (int i = 0; i < channels; i++)
407
                {
408
                    bias[i] = bias[i] + bias_data[i];
409
                }
410
            }
411
        }
412

413
        int top_blob_index_final = binaryop->tops[0];
414
        convolution->tops[0] = top_blob_index_final;
415
        blobs[top_blob_index_final].producer = i;
416
        binaryop->type = "ncnnfused";
417
    }
418

419
    return 0;
420
}
421

422
int NetOptimize::fuse_convolutiondepthwise_batchnorm()
423
{
424
    const size_t layer_count = layers.size();
425
    for (size_t i = 0; i < layer_count; i++)
426
    {
427
        if (layers[i]->type != "ConvolutionDepthWise")
428
            continue;
429

430
        // ConvolutionDepthWise - BatchNorm
431
        int top_blob_index = layers[i]->tops[0];
432

433
        size_t j = i + 1;
434
        for (; j < layer_count; j++)
435
        {
436
            if (layers[j]->type != "BatchNorm")
437
                continue;
438

439
            if (layers[j]->bottoms.size() != 1)
440
                continue;
441

442
            if (layers[j]->bottoms[0] == top_blob_index)
443
                break;
444
        }
445

446
        if (j == layer_count)
447
            continue;
448

449
        // fuse ConvolutionDepthWise - BatchNorm to ConvolutionDepthWise
450
        ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
451
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
452

453
        fprintf(stderr, "fuse_convolutiondepthwise_batchnorm %s %s\n", convolutiondepthwise->name.c_str(), batchnorm->name.c_str());
454

455
        {
456
            int channels = batchnorm->channels;
457
            float eps = batchnorm->eps;
458

459
            // a = bias - slope * mean / sqrt(var + eps)
460
            // b = slope / sqrt(var + eps)
461
            // value = value * b + a
462

463
            std::vector<float> a(channels);
464
            std::vector<float> b(channels);
465
            for (int i = 0; i < channels; i++)
466
            {
467
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
468
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
469
                b[i] = batchnorm->slope_data[i] / sqrt_var;
470
            }
471

472
            if (convolutiondepthwise->bias_term == 0)
473
            {
474
                // init bias as zero
475
                convolutiondepthwise->bias_term = 1;
476
                convolutiondepthwise->bias_data = ncnn::Mat(channels);
477
                convolutiondepthwise->bias_data.fill(0.f);
478
            }
479

480
            const int weight_per_outch = convolutiondepthwise->weight_data_size / channels;
481

482
            float* weight = convolutiondepthwise->weight_data;
483
            float* bias = convolutiondepthwise->bias_data;
484
            for (int i = 0; i < channels; i++)
485
            {
486
                float* conv_weight_outch = weight + weight_per_outch * i;
487
                for (int j = 0; j < weight_per_outch; j++)
488
                {
489
                    conv_weight_outch[j] *= b[i];
490
                }
491

492
                bias[i] = bias[i] * b[i] + a[i];
493
            }
494
        }
495

496
        int top_blob_index_final = batchnorm->tops[0];
497
        convolutiondepthwise->tops[0] = top_blob_index_final;
498
        blobs[top_blob_index_final].producer = i;
499
        batchnorm->type = "ncnnfused";
500
    }
501

502
    return 0;
503
}
504

505
int NetOptimize::fuse_convolutiondepthwise_mul()
506
{
507
    const size_t layer_count = layers.size();
508
    for (size_t i = 0; i < layer_count; i++)
509
    {
510
        if (layers[i]->type != "ConvolutionDepthWise")
511
            continue;
512

513
        // ConvolutionDepthWise - BinaryOp
514
        int top_blob_index = layers[i]->tops[0];
515

516
        size_t j = i + 1;
517
        for (; j < layer_count; j++)
518
        {
519
            if (layers[j]->type != "BinaryOp")
520
                continue;
521

522
            if (layers[j]->bottoms.size() != 2)
523
                continue;
524

525
            if (layers[j]->bottoms[0] == top_blob_index)
526
                break;
527
        }
528

529
        if (j == layer_count)
530
            continue;
531

532
        // fuse ConvolutionDepthWise - BinaryOp to ConvolutionDepthWise
533
        ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
534
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
535

536
        if (binaryop->op_type != 2 || binaryop->with_scalar)
537
            continue;
538

539
        // MemoryData - ..... - BinaryOp
540
        size_t k = 0;
541
        for (; k < j; k++)
542
        {
543
            if (layers[k]->type != "MemoryData")
544
                continue;
545

546
            if (layers[k]->tops[0] == binaryop->bottoms[1])
547
                break;
548
        }
549

550
        if (k == j)
551
            continue;
552

553
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[k];
554

555
        int channels = convolutiondepthwise->num_output;
556

557
        if (memorydata->w != channels || memorydata->h != 0 || memorydata->c != 0)
558
        {
559
            // not bias-like broadcasting type
560
            continue;
561
        }
562

563
        fprintf(stderr, "fuse_convolutiondepthwise_mul %s %s\n", convolutiondepthwise->name.c_str(), binaryop->name.c_str());
564

565
        {
566
            const int weight_per_outch = convolutiondepthwise->weight_data_size / channels;
567

568
            float* weight = convolutiondepthwise->weight_data;
569
            float* bias = convolutiondepthwise->bias_data;
570
            for (int i = 0; i < channels; i++)
571
            {
572
                float* conv_weight_outch = weight + weight_per_outch * i;
573
                for (int j = 0; j < weight_per_outch; j++)
574
                {
575
                    conv_weight_outch[j] *= memorydata->data[i];
576
                }
577

578
                if (bias)
579
                {
580
                    bias[i] = bias[i] * memorydata->data[i];
581
                }
582
            }
583
        }
584

585
        int top_blob_index_final = binaryop->tops[0];
586
        convolutiondepthwise->tops[0] = top_blob_index_final;
587
        blobs[top_blob_index_final].producer = i;
588
        binaryop->type = "ncnnfused";
589
    }
590

591
    return 0;
592
}
593

594
int NetOptimize::fuse_convolutiondepthwise_add()
595
{
596
    const size_t layer_count = layers.size();
597
    for (size_t i = 0; i < layer_count; i++)
598
    {
599
        if (layers[i]->type != "ConvolutionDepthWise")
600
            continue;
601

602
        // ConvolutionDepthWise - BinaryOp
603
        int top_blob_index = layers[i]->tops[0];
604

605
        size_t j = i + 1;
606
        for (; j < layer_count; j++)
607
        {
608
            if (layers[j]->type != "BinaryOp")
609
                continue;
610

611
            if (layers[j]->bottoms.size() != 2)
612
                continue;
613

614
            if (layers[j]->bottoms[0] == top_blob_index)
615
                break;
616
        }
617

618
        if (j == layer_count)
619
            continue;
620

621
        // fuse ConvolutionDepthWise - BinaryOp to ConvolutionDepthWise
622
        ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
623
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
624

625
        if (binaryop->op_type != 0 || binaryop->with_scalar)
626
            continue;
627

628
        // MemoryData - ..... - BinaryOp
629
        size_t k = 0;
630
        for (; k < j; k++)
631
        {
632
            if (layers[k]->type != "MemoryData")
633
                continue;
634

635
            if (layers[k]->tops[0] == binaryop->bottoms[1])
636
                break;
637
        }
638

639
        if (k == j)
640
            continue;
641

642
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[k];
643

644
        int channels = convolutiondepthwise->num_output;
645

646
        bool broadcasting_type_ok = false;
647
        if (memorydata->w == channels && memorydata->h == 0 && memorydata->c == 0)
648
            broadcasting_type_ok = true;
649
        if (memorydata->w == 1 && memorydata->h == 1 && memorydata->c == channels)
650
            broadcasting_type_ok = true;
651

652
        if (!broadcasting_type_ok)
653
        {
654
            // not bias-like broadcasting type
655
            continue;
656
        }
657

658
        fprintf(stderr, "fuse_convolutiondepthwise_add %s %s\n", convolutiondepthwise->name.c_str(), binaryop->name.c_str());
659

660
        ncnn::Mat bias_data = memorydata->data.reshape(channels);
661
        {
662
            if (convolutiondepthwise->bias_term == 0)
663
            {
664
                // init bias
665
                convolutiondepthwise->bias_term = 1;
666
                convolutiondepthwise->bias_data = bias_data;
667
            }
668
            else
669
            {
670
                float* bias = convolutiondepthwise->bias_data;
671
                for (int i = 0; i < channels; i++)
672
                {
673
                    bias[i] = bias[i] + bias_data[i];
674
                }
675
            }
676
        }
677

678
        int top_blob_index_final = binaryop->tops[0];
679
        convolutiondepthwise->tops[0] = top_blob_index_final;
680
        blobs[top_blob_index_final].producer = i;
681
        binaryop->type = "ncnnfused";
682
    }
683

684
    return 0;
685
}
686

687
int NetOptimize::fuse_deconvolution_batchnorm()
688
{
689
    const size_t layer_count = layers.size();
690
    for (size_t i = 0; i < layer_count; i++)
691
    {
692
        if (layers[i]->type != "Deconvolution")
693
            continue;
694

695
        // Deconvolution - BatchNorm
696
        int top_blob_index = layers[i]->tops[0];
697

698
        size_t j = i + 1;
699
        for (; j < layer_count; j++)
700
        {
701
            if (layers[j]->type != "BatchNorm")
702
                continue;
703

704
            if (layers[j]->bottoms.size() != 1)
705
                continue;
706

707
            if (layers[j]->bottoms[0] == top_blob_index)
708
                break;
709
        }
710

711
        if (j == layer_count)
712
            continue;
713

714
        // fuse Deconvolution - BatchNorm to Deconvolution
715
        ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
716
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
717

718
        fprintf(stderr, "fuse_deconvolution_batchnorm %s %s\n", deconvolution->name.c_str(), batchnorm->name.c_str());
719

720
        {
721
            int channels = batchnorm->channels;
722
            float eps = batchnorm->eps;
723

724
            // a = bias - slope * mean / sqrt(var + eps)
725
            // b = slope / sqrt(var + eps)
726
            // value = value * b + a
727

728
            std::vector<float> a(channels);
729
            std::vector<float> b(channels);
730
            for (int i = 0; i < channels; i++)
731
            {
732
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
733
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
734
                b[i] = batchnorm->slope_data[i] / sqrt_var;
735
            }
736

737
            if (deconvolution->bias_term == 0)
738
            {
739
                // init bias as zero
740
                deconvolution->bias_term = 1;
741
                deconvolution->bias_data = ncnn::Mat(channels);
742
                deconvolution->bias_data.fill(0.f);
743
            }
744

745
            const int weight_per_outch = deconvolution->weight_data_size / channels;
746

747
            float* weight = deconvolution->weight_data;
748
            float* bias = deconvolution->bias_data;
749
            for (int i = 0; i < channels; i++)
750
            {
751
                float* conv_weight_outch = weight + weight_per_outch * i;
752
                for (int j = 0; j < weight_per_outch; j++)
753
                {
754
                    conv_weight_outch[j] *= b[i];
755
                }
756

757
                bias[i] = bias[i] * b[i] + a[i];
758
            }
759
        }
760

761
        int top_blob_index_final = batchnorm->tops[0];
762
        deconvolution->tops[0] = top_blob_index_final;
763
        blobs[top_blob_index_final].producer = i;
764
        batchnorm->type = "ncnnfused";
765
    }
766

767
    return 0;
768
}
769

770
int NetOptimize::fuse_deconvolution_mul()
771
{
772
    const size_t layer_count = layers.size();
773
    for (size_t i = 0; i < layer_count; i++)
774
    {
775
        if (layers[i]->type != "Deconvolution")
776
            continue;
777

778
        // Deconvolution - BinaryOp
779
        int top_blob_index = layers[i]->tops[0];
780

781
        size_t j = i + 1;
782
        for (; j < layer_count; j++)
783
        {
784
            if (layers[j]->type != "BinaryOp")
785
                continue;
786

787
            if (layers[j]->bottoms.size() != 2)
788
                continue;
789

790
            if (layers[j]->bottoms[0] == top_blob_index)
791
                break;
792
        }
793

794
        if (j == layer_count)
795
            continue;
796

797
        // fuse Deconvolution - BinaryOp to Deconvolution
798
        ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
799
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
800

801
        if (binaryop->op_type != 2 || binaryop->with_scalar)
802
            continue;
803

804
        // MemoryData - ..... - BinaryOp
805
        size_t k = 0;
806
        for (; k < j; k++)
807
        {
808
            if (layers[k]->type != "MemoryData")
809
                continue;
810

811
            if (layers[k]->tops[0] == binaryop->bottoms[1])
812
                break;
813
        }
814

815
        if (k == j)
816
            continue;
817

818
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[k];
819

820
        int channels = deconvolution->num_output;
821

822
        if (memorydata->w != channels || memorydata->h != 0 || memorydata->c != 0)
823
        {
824
            // not bias-like broadcasting type
825
            continue;
826
        }
827

828
        fprintf(stderr, "fuse_deconvolution_mul %s %s\n", deconvolution->name.c_str(), binaryop->name.c_str());
829

830
        {
831
            const int weight_per_outch = deconvolution->weight_data_size / channels;
832

833
            float* weight = deconvolution->weight_data;
834
            float* bias = deconvolution->bias_data;
835
            for (int i = 0; i < channels; i++)
836
            {
837
                float* conv_weight_outch = weight + weight_per_outch * i;
838
                for (int j = 0; j < weight_per_outch; j++)
839
                {
840
                    conv_weight_outch[j] *= memorydata->data[i];
841
                }
842

843
                if (bias)
844
                {
845
                    bias[i] = bias[i] * memorydata->data[i];
846
                }
847
            }
848
        }
849

850
        int top_blob_index_final = binaryop->tops[0];
851
        deconvolution->tops[0] = top_blob_index_final;
852
        blobs[top_blob_index_final].producer = i;
853
        binaryop->type = "ncnnfused";
854
    }
855

856
    return 0;
857
}
858

859
int NetOptimize::fuse_deconvolution_add()
860
{
861
    const size_t layer_count = layers.size();
862
    for (size_t i = 0; i < layer_count; i++)
863
    {
864
        if (layers[i]->type != "Deconvolution")
865
            continue;
866

867
        // Deconvolution - BinaryOp
868
        int top_blob_index = layers[i]->tops[0];
869

870
        size_t j = i + 1;
871
        for (; j < layer_count; j++)
872
        {
873
            if (layers[j]->type != "BinaryOp")
874
                continue;
875

876
            if (layers[j]->bottoms.size() != 2)
877
                continue;
878

879
            if (layers[j]->bottoms[0] == top_blob_index)
880
                break;
881
        }
882

883
        if (j == layer_count)
884
            continue;
885

886
        // fuse Deconvolution - BinaryOp to Deconvolution
887
        ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
888
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
889

890
        if (binaryop->op_type != 0 || binaryop->with_scalar)
891
            continue;
892

893
        // MemoryData - ..... - BinaryOp
894
        size_t k = 0;
895
        for (; k < j; k++)
896
        {
897
            if (layers[k]->type != "MemoryData")
898
                continue;
899

900
            if (layers[k]->tops[0] == binaryop->bottoms[1])
901
                break;
902
        }
903

904
        if (k == j)
905
            continue;
906

907
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[k];
908

909
        int channels = deconvolution->num_output;
910

911
        bool broadcasting_type_ok = false;
912
        if (memorydata->w == channels && memorydata->h == 0 && memorydata->c == 0)
913
            broadcasting_type_ok = true;
914
        if (memorydata->w == 1 && memorydata->h == 1 && memorydata->c == channels)
915
            broadcasting_type_ok = true;
916

917
        if (!broadcasting_type_ok)
918
        {
919
            // not bias-like broadcasting type
920
            continue;
921
        }
922

923
        fprintf(stderr, "fuse_deconvolution_add %s %s\n", deconvolution->name.c_str(), binaryop->name.c_str());
924

925
        ncnn::Mat bias_data = memorydata->data.reshape(channels);
926
        {
927
            if (deconvolution->bias_term == 0)
928
            {
929
                // init bias
930
                deconvolution->bias_term = 1;
931
                deconvolution->bias_data = bias_data;
932
            }
933
            else
934
            {
935
                float* bias = deconvolution->bias_data;
936
                for (int i = 0; i < channels; i++)
937
                {
938
                    bias[i] = bias[i] + bias_data[i];
939
                }
940
            }
941
        }
942

943
        int top_blob_index_final = binaryop->tops[0];
944
        deconvolution->tops[0] = top_blob_index_final;
945
        blobs[top_blob_index_final].producer = i;
946
        binaryop->type = "ncnnfused";
947
    }
948

949
    return 0;
950
}
951

952
int NetOptimize::fuse_deconvolutiondepthwise_batchnorm()
953
{
954
    const size_t layer_count = layers.size();
955
    for (size_t i = 0; i < layer_count; i++)
956
    {
957
        if (layers[i]->type != "DeconvolutionDepthWise")
958
            continue;
959

960
        // DeconvolutionDepthWise - BatchNorm
961
        int top_blob_index = layers[i]->tops[0];
962

963
        size_t j = i + 1;
964
        for (; j < layer_count; j++)
965
        {
966
            if (layers[j]->type != "BatchNorm")
967
                continue;
968

969
            if (layers[j]->bottoms.size() != 1)
970
                continue;
971

972
            if (layers[j]->bottoms[0] == top_blob_index)
973
                break;
974
        }
975

976
        if (j == layer_count)
977
            continue;
978

979
        // fuse DeconvolutionDepthWise - BatchNorm to DeconvolutionDepthWise
980
        ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
981
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
982

983
        fprintf(stderr, "fuse_deconvolutiondepthwise_batchnorm %s %s\n", deconvolutiondepthwise->name.c_str(), batchnorm->name.c_str());
984

985
        {
986
            int channels = batchnorm->channels;
987
            float eps = batchnorm->eps;
988

989
            // a = bias - slope * mean / sqrt(var + eps)
990
            // b = slope / sqrt(var + eps)
991
            // value = value * b + a
992

993
            std::vector<float> a(channels);
994
            std::vector<float> b(channels);
995
            for (int i = 0; i < channels; i++)
996
            {
997
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
998
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
999
                b[i] = batchnorm->slope_data[i] / sqrt_var;
1000
            }
1001

1002
            if (deconvolutiondepthwise->bias_term == 0)
1003
            {
1004
                // init bias as zero
1005
                deconvolutiondepthwise->bias_term = 1;
1006
                deconvolutiondepthwise->bias_data = ncnn::Mat(channels);
1007
                deconvolutiondepthwise->bias_data.fill(0.f);
1008
            }
1009

1010
            const int weight_per_outch = deconvolutiondepthwise->weight_data_size / channels;
1011

1012
            float* weight = deconvolutiondepthwise->weight_data;
1013
            float* bias = deconvolutiondepthwise->bias_data;
1014
            for (int i = 0; i < channels; i++)
1015
            {
1016
                float* conv_weight_outch = weight + weight_per_outch * i;
1017
                for (int j = 0; j < weight_per_outch; j++)
1018
                {
1019
                    conv_weight_outch[j] *= b[i];
1020
                }
1021

1022
                bias[i] = bias[i] * b[i] + a[i];
1023
            }
1024
        }
1025

1026
        int top_blob_index_final = batchnorm->tops[0];
1027
        deconvolutiondepthwise->tops[0] = top_blob_index_final;
1028
        blobs[top_blob_index_final].producer = i;
1029
        batchnorm->type = "ncnnfused";
1030
    }
1031

1032
    return 0;
1033
}
1034

1035
int NetOptimize::fuse_innerproduct_batchnorm()
1036
{
1037
    const size_t layer_count = layers.size();
1038
    for (size_t i = 0; i < layer_count; i++)
1039
    {
1040
        if (layers[i]->type != "InnerProduct")
1041
            continue;
1042

1043
        // InnerProduct - BatchNorm
1044
        int top_blob_index = layers[i]->tops[0];
1045

1046
        size_t j = i + 1;
1047
        for (; j < layer_count; j++)
1048
        {
1049
            if (layers[j]->type != "BatchNorm")
1050
                continue;
1051

1052
            if (layers[j]->bottoms.size() != 1)
1053
                continue;
1054

1055
            if (layers[j]->bottoms[0] == top_blob_index)
1056
                break;
1057
        }
1058

1059
        if (j == layer_count)
1060
            continue;
1061

1062
        // fuse InnerProduct - BatchNorm to InnerProduct
1063
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
1064
        ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];
1065

1066
        fprintf(stderr, "fuse_innerproduct_batchnorm %s %s\n", innerproduct->name.c_str(), batchnorm->name.c_str());
1067

1068
        {
1069
            int channels = batchnorm->channels;
1070
            float eps = batchnorm->eps;
1071

1072
            // a = bias - slope * mean / sqrt(var + eps)
1073
            // b = slope / sqrt(var + eps)
1074
            // value = value * b + a
1075

1076
            std::vector<float> a(channels);
1077
            std::vector<float> b(channels);
1078
            for (int i = 0; i < channels; i++)
1079
            {
1080
                float sqrt_var = static_cast<float>(sqrt(batchnorm->var_data[i] + eps));
1081
                a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;
1082
                b[i] = batchnorm->slope_data[i] / sqrt_var;
1083
            }
1084

1085
            if (innerproduct->bias_term == 0)
1086
            {
1087
                // init bias as zero
1088
                innerproduct->bias_term = 1;
1089
                innerproduct->bias_data = ncnn::Mat(channels);
1090
                innerproduct->bias_data.fill(0.f);
1091
            }
1092

1093
            const int weight_per_outch = innerproduct->weight_data_size / channels;
1094

1095
            float* weight = innerproduct->weight_data;
1096
            float* bias = innerproduct->bias_data;
1097
            for (int i = 0; i < channels; i++)
1098
            {
1099
                float* conv_weight_outch = weight + weight_per_outch * i;
1100
                for (int j = 0; j < weight_per_outch; j++)
1101
                {
1102
                    conv_weight_outch[j] *= b[i];
1103
                }
1104

1105
                bias[i] = bias[i] * b[i] + a[i];
1106
            }
1107
        }
1108

1109
        int top_blob_index_final = batchnorm->tops[0];
1110
        innerproduct->tops[0] = top_blob_index_final;
1111
        blobs[top_blob_index_final].producer = i;
1112
        batchnorm->type = "ncnnfused";
1113
    }
1114

1115
    return 0;
1116
}
1117

1118
int NetOptimize::fuse_innerproduct_add()
1119
{
1120
    const size_t layer_count = layers.size();
1121
    for (size_t i = 0; i < layer_count; i++)
1122
    {
1123
        if (layers[i]->type != "InnerProduct")
1124
            continue;
1125

1126
        // InnerProduct - BinaryOp
1127
        int top_blob_index = layers[i]->tops[0];
1128

1129
        size_t j = i + 1;
1130
        for (; j < layer_count; j++)
1131
        {
1132
            if (layers[j]->type != "BinaryOp")
1133
                continue;
1134

1135
            if (layers[j]->bottoms.size() != 2)
1136
                continue;
1137

1138
            if (layers[j]->bottoms[0] == top_blob_index)
1139
                break;
1140
        }
1141

1142
        if (j == layer_count)
1143
            continue;
1144

1145
        // fuse InnerProduct - BinaryOp to InnerProduct
1146
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
1147
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
1148

1149
        if (binaryop->op_type != 0 || binaryop->with_scalar)
1150
            continue;
1151

1152
        // MemoryData - ..... - BinaryOp
1153
        size_t k = 0;
1154
        for (; k < j; k++)
1155
        {
1156
            if (layers[k]->type != "MemoryData")
1157
                continue;
1158

1159
            if (layers[k]->tops[0] == binaryop->bottoms[1])
1160
                break;
1161
        }
1162

1163
        if (k == j)
1164
            continue;
1165

1166
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[k];
1167

1168
        int channels = innerproduct->num_output;
1169

1170
        bool broadcasting_type_ok = false;
1171
        if (memorydata->w == channels && memorydata->h == 0 && memorydata->c == 0)
1172
            broadcasting_type_ok = true;
1173
        if (memorydata->w == 1 && memorydata->h == 1 && memorydata->c == channels)
1174
            broadcasting_type_ok = true;
1175

1176
        if (!broadcasting_type_ok)
1177
        {
1178
            // not bias-like broadcasting type
1179
            continue;
1180
        }
1181

1182
        fprintf(stderr, "fuse_innerproduct_add %s %s\n", innerproduct->name.c_str(), binaryop->name.c_str());
1183

1184
        ncnn::Mat bias_data = memorydata->data.reshape(channels);
1185
        {
1186
            if (innerproduct->bias_term == 0)
1187
            {
1188
                // init bias
1189
                innerproduct->bias_term = 1;
1190
                innerproduct->bias_data = bias_data;
1191
            }
1192
            else
1193
            {
1194
                float* bias = innerproduct->bias_data;
1195
                for (int i = 0; i < channels; i++)
1196
                {
1197
                    bias[i] = bias[i] + bias_data[i];
1198
                }
1199
            }
1200
        }
1201

1202
        int top_blob_index_final = binaryop->tops[0];
1203
        innerproduct->tops[0] = top_blob_index_final;
1204
        blobs[top_blob_index_final].producer = i;
1205
        binaryop->type = "ncnnfused";
1206
    }
1207

1208
    return 0;
1209
}
1210

1211
int NetOptimize::fuse_innerproduct_dropout()
1212
{
1213
    const size_t layer_count = layers.size();
1214
    for (size_t i = 0; i < layer_count; i++)
1215
    {
1216
        if (layers[i]->type != "InnerProduct")
1217
            continue;
1218

1219
        // InnerProduct - Dropout
1220
        int top_blob_index = layers[i]->tops[0];
1221

1222
        size_t j = i + 1;
1223
        for (; j < layer_count; j++)
1224
        {
1225
            if (layers[j]->type != "Dropout")
1226
                continue;
1227

1228
            if (layers[j]->bottoms.size() != 1)
1229
                continue;
1230

1231
            if (layers[j]->bottoms[0] == top_blob_index)
1232
                break;
1233
        }
1234

1235
        if (j == layer_count)
1236
            continue;
1237

1238
        // fuse InnerProduct - Dropout to InnerProduct
1239
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
1240
        ncnn::Dropout* dropout = (ncnn::Dropout*)layers[j];
1241

1242
        fprintf(stderr, "fuse_innerproduct_dropout %s %s\n", innerproduct->name.c_str(), dropout->name.c_str());
1243

1244
        float scale = dropout->scale;
1245
        if (scale != 1.f)
1246
        {
1247
            const int num_output = innerproduct->num_output;
1248
            const int weight_per_outch = innerproduct->weight_data_size / num_output;
1249

1250
            float* weight = innerproduct->weight_data;
1251
            for (int i = 0; i < num_output; i++)
1252
            {
1253
                float* conv_weight_outch = weight + weight_per_outch * i;
1254
                for (int j = 0; j < weight_per_outch; j++)
1255
                {
1256
                    conv_weight_outch[j] *= scale;
1257
                }
1258
            }
1259

1260
            if (innerproduct->bias_term)
1261
            {
1262
                float* bias = innerproduct->bias_data;
1263
                for (int i = 0; i < num_output; i++)
1264
                {
1265
                    bias[i] *= scale;
1266
                }
1267
            }
1268
        }
1269

1270
        int top_blob_index_final = dropout->tops[0];
1271
        innerproduct->tops[0] = top_blob_index_final;
1272
        blobs[top_blob_index_final].producer = i;
1273
        dropout->type = "ncnnfused";
1274
    }
1275

1276
    return 0;
1277
}
1278

1279
int NetOptimize::fuse_convolution_activation()
1280
{
1281
    const size_t layer_count = layers.size();
1282
    for (size_t i = 0; i < layer_count; i++)
1283
    {
1284
        if (layers[i]->type != "Convolution")
1285
            continue;
1286

1287
        // Convolution - Activation
1288
        int top_blob_index = layers[i]->tops[0];
1289

1290
        size_t j = i + 1;
1291
        for (; j < layer_count; j++)
1292
        {
1293
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid" && layers[j]->type != "Mish" && layers[j]->type != "HardSwish")
1294
                continue;
1295

1296
            if (layers[j]->bottoms.size() != 1)
1297
                continue;
1298

1299
            if (layers[j]->bottoms[0] == top_blob_index)
1300
                break;
1301
        }
1302

1303
        if (j == layer_count)
1304
            continue;
1305

1306
        // fuse Convolution - Activation to Convolution
1307
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];
1308
        ncnn::Layer* activation = layers[j];
1309

1310
        fprintf(stderr, "fuse_convolution_activation %s %s\n", convolution->name.c_str(), activation->name.c_str());
1311

1312
        if (activation->type == "ReLU")
1313
        {
1314
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;
1315

1316
            if (relu->slope == 0.f)
1317
            {
1318
                convolution->activation_type = 1;
1319
            }
1320
            else
1321
            {
1322
                convolution->activation_type = 2;
1323
                convolution->activation_params = ncnn::Mat(1);
1324
                convolution->activation_params[0] = relu->slope;
1325
            }
1326
        }
1327
        else if (activation->type == "Clip")
1328
        {
1329
            ncnn::Clip* clip = (ncnn::Clip*)activation;
1330

1331
            convolution->activation_type = 3;
1332
            convolution->activation_params = ncnn::Mat(2);
1333
            convolution->activation_params[0] = clip->min;
1334
            convolution->activation_params[1] = clip->max;
1335
        }
1336
        else if (activation->type == "Sigmoid")
1337
        {
1338
            convolution->activation_type = 4;
1339
        }
1340
        else if (activation->type == "Mish")
1341
        {
1342
            convolution->activation_type = 5;
1343
        }
1344
        else if (activation->type == "HardSwish")
1345
        {
1346
            ncnn::HardSwish* hardswish = (ncnn::HardSwish*)activation;
1347

1348
            convolution->activation_type = 6;
1349
            convolution->activation_params = ncnn::Mat(2);
1350
            convolution->activation_params[0] = hardswish->alpha;
1351
            convolution->activation_params[1] = hardswish->beta;
1352
        }
1353

1354
        int top_blob_index_final = activation->tops[0];
1355
        convolution->tops[0] = top_blob_index_final;
1356
        blobs[top_blob_index_final].producer = i;
1357
        activation->type = "ncnnfused";
1358
    }
1359

1360
    for (size_t i = 0; i < layer_count; i++)
1361
    {
1362
        if (layers[i]->type != "Convolution1D")
1363
            continue;
1364

1365
        // Convolution1D - Activation
1366
        int top_blob_index = layers[i]->tops[0];
1367

1368
        size_t j = i + 1;
1369
        for (; j < layer_count; j++)
1370
        {
1371
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid" && layers[j]->type != "Mish")
1372
                continue;
1373

1374
            if (layers[j]->bottoms.size() != 1)
1375
                continue;
1376

1377
            if (layers[j]->bottoms[0] == top_blob_index)
1378
                break;
1379
        }
1380

1381
        if (j == layer_count)
1382
            continue;
1383

1384
        // fuse Convolution1D - Activation to Convolution1D
1385
        ncnn::Convolution1D* convolution = (ncnn::Convolution1D*)layers[i];
1386
        ncnn::Layer* activation = layers[j];
1387

1388
        fprintf(stderr, "fuse_convolution1d_activation %s %s\n", convolution->name.c_str(), activation->name.c_str());
1389

1390
        if (activation->type == "ReLU")
1391
        {
1392
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;
1393

1394
            if (relu->slope == 0.f)
1395
            {
1396
                convolution->activation_type = 1;
1397
            }
1398
            else
1399
            {
1400
                convolution->activation_type = 2;
1401
                convolution->activation_params = ncnn::Mat(1);
1402
                convolution->activation_params[0] = relu->slope;
1403
            }
1404
        }
1405
        else if (activation->type == "Clip")
1406
        {
1407
            ncnn::Clip* clip = (ncnn::Clip*)activation;
1408

1409
            convolution->activation_type = 3;
1410
            convolution->activation_params = ncnn::Mat(2);
1411
            convolution->activation_params[0] = clip->min;
1412
            convolution->activation_params[1] = clip->max;
1413
        }
1414
        else if (activation->type == "Sigmoid")
1415
        {
1416
            convolution->activation_type = 4;
1417
        }
1418
        else if (activation->type == "Mish")
1419
        {
1420
            convolution->activation_type = 5;
1421
        }
1422

1423
        int top_blob_index_final = activation->tops[0];
1424
        convolution->tops[0] = top_blob_index_final;
1425
        blobs[top_blob_index_final].producer = i;
1426
        activation->type = "ncnnfused";
1427
    }
1428

1429
    return 0;
1430
}
1431

1432
int NetOptimize::fuse_convolutiondepthwise_activation()
1433
{
1434
    const size_t layer_count = layers.size();
1435
    for (size_t i = 0; i < layer_count; i++)
1436
    {
1437
        if (layers[i]->type != "ConvolutionDepthWise")
1438
            continue;
1439

1440
        // ConvolutionDepthWise - Activation
1441
        int top_blob_index = layers[i]->tops[0];
1442

1443
        size_t j = i + 1;
1444
        for (; j < layer_count; j++)
1445
        {
1446
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid" && layers[j]->type != "Mish" && layers[j]->type != "HardSwish")
1447
                continue;
1448

1449
            if (layers[j]->bottoms.size() != 1)
1450
                continue;
1451

1452
            if (layers[j]->bottoms[0] == top_blob_index)
1453
                break;
1454
        }
1455

1456
        if (j == layer_count)
1457
            continue;
1458

1459
        // fuse ConvolutionDepthWise - Activation to ConvolutionDepthWise
1460
        ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layers[i];
1461
        ncnn::Layer* activation = layers[j];
1462

1463
        fprintf(stderr, "fuse_convolutiondepthwise_activation %s %s\n", convolutiondepthwise->name.c_str(), activation->name.c_str());
1464

1465
        if (activation->type == "ReLU")
1466
        {
1467
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;
1468

1469
            if (relu->slope == 0.f)
1470
            {
1471
                convolutiondepthwise->activation_type = 1;
1472
            }
1473
            else
1474
            {
1475
                convolutiondepthwise->activation_type = 2;
1476
                convolutiondepthwise->activation_params = ncnn::Mat(1);
1477
                convolutiondepthwise->activation_params[0] = relu->slope;
1478
            }
1479
        }
1480
        else if (activation->type == "Clip")
1481
        {
1482
            ncnn::Clip* clip = (ncnn::Clip*)activation;
1483

1484
            convolutiondepthwise->activation_type = 3;
1485
            convolutiondepthwise->activation_params = ncnn::Mat(2);
1486
            convolutiondepthwise->activation_params[0] = clip->min;
1487
            convolutiondepthwise->activation_params[1] = clip->max;
1488
        }
1489
        else if (activation->type == "Sigmoid")
1490
        {
1491
            convolutiondepthwise->activation_type = 4;
1492
        }
1493
        else if (activation->type == "Mish")
1494
        {
1495
            convolutiondepthwise->activation_type = 5;
1496
        }
1497
        else if (activation->type == "HardSwish")
1498
        {
1499
            ncnn::HardSwish* hardswish = (ncnn::HardSwish*)activation;
1500

1501
            convolutiondepthwise->activation_type = 6;
1502
            convolutiondepthwise->activation_params = ncnn::Mat(2);
1503
            convolutiondepthwise->activation_params[0] = hardswish->alpha;
1504
            convolutiondepthwise->activation_params[1] = hardswish->beta;
1505
        }
1506

1507
        int top_blob_index_final = activation->tops[0];
1508
        convolutiondepthwise->tops[0] = top_blob_index_final;
1509
        blobs[top_blob_index_final].producer = i;
1510
        activation->type = "ncnnfused";
1511
    }
1512

1513
    return 0;
1514
}
1515

1516
int NetOptimize::fuse_deconvolution_activation()
1517
{
1518
    const size_t layer_count = layers.size();
1519
    for (size_t i = 0; i < layer_count; i++)
1520
    {
1521
        if (layers[i]->type != "Deconvolution")
1522
            continue;
1523

1524
        // Deconvolution - Activation
1525
        int top_blob_index = layers[i]->tops[0];
1526

1527
        size_t j = i + 1;
1528
        for (; j < layer_count; j++)
1529
        {
1530
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
1531
                continue;
1532

1533
            if (layers[j]->bottoms.size() != 1)
1534
                continue;
1535

1536
            if (layers[j]->bottoms[0] == top_blob_index)
1537
                break;
1538
        }
1539

1540
        if (j == layer_count)
1541
            continue;
1542

1543
        // fuse Deconvolution - Activation to Deconvolution
1544
        ncnn::Deconvolution* deconvolution = (ncnn::Deconvolution*)layers[i];
1545
        ncnn::Layer* activation = layers[j];
1546

1547
        fprintf(stderr, "fuse_deconvolution_activation %s %s\n", deconvolution->name.c_str(), activation->name.c_str());
1548

1549
        if (activation->type == "ReLU")
1550
        {
1551
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;
1552

1553
            if (relu->slope == 0.f)
1554
            {
1555
                deconvolution->activation_type = 1;
1556
            }
1557
            else
1558
            {
1559
                deconvolution->activation_type = 2;
1560
                deconvolution->activation_params = ncnn::Mat(1);
1561
                deconvolution->activation_params[0] = relu->slope;
1562
            }
1563
        }
1564
        else if (activation->type == "Clip")
1565
        {
1566
            ncnn::Clip* clip = (ncnn::Clip*)activation;
1567

1568
            deconvolution->activation_type = 3;
1569
            deconvolution->activation_params = ncnn::Mat(2);
1570
            deconvolution->activation_params[0] = clip->min;
1571
            deconvolution->activation_params[1] = clip->max;
1572
        }
1573
        else if (activation->type == "Sigmoid")
1574
        {
1575
            deconvolution->activation_type = 4;
1576
        }
1577

1578
        int top_blob_index_final = activation->tops[0];
1579
        deconvolution->tops[0] = top_blob_index_final;
1580
        blobs[top_blob_index_final].producer = i;
1581
        activation->type = "ncnnfused";
1582
    }
1583

1584
    return 0;
1585
}
1586

1587
int NetOptimize::fuse_deconvolutiondepthwise_activation()
1588
{
1589
    const size_t layer_count = layers.size();
1590
    for (size_t i = 0; i < layer_count; i++)
1591
    {
1592
        if (layers[i]->type != "DeconvolutionDepthWise")
1593
            continue;
1594

1595
        // DeconvolutionDepthWise - Activation
1596
        int top_blob_index = layers[i]->tops[0];
1597

1598
        size_t j = i + 1;
1599
        for (; j < layer_count; j++)
1600
        {
1601
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid")
1602
                continue;
1603

1604
            if (layers[j]->bottoms.size() != 1)
1605
                continue;
1606

1607
            if (layers[j]->bottoms[0] == top_blob_index)
1608
                break;
1609
        }
1610

1611
        if (j == layer_count)
1612
            continue;
1613

1614
        // fuse DeconvolutionDepthWise - Activation to DeconvolutionDepthWise
1615
        ncnn::DeconvolutionDepthWise* deconvolutiondepthwise = (ncnn::DeconvolutionDepthWise*)layers[i];
1616
        ncnn::Layer* activation = layers[j];
1617

1618
        fprintf(stderr, "fuse_deconvolutiondepthwise_activation %s %s\n", deconvolutiondepthwise->name.c_str(), activation->name.c_str());
1619

1620
        if (activation->type == "ReLU")
1621
        {
1622
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;
1623

1624
            if (relu->slope == 0.f)
1625
            {
1626
                deconvolutiondepthwise->activation_type = 1;
1627
            }
1628
            else
1629
            {
1630
                deconvolutiondepthwise->activation_type = 2;
1631
                deconvolutiondepthwise->activation_params = ncnn::Mat(1);
1632
                deconvolutiondepthwise->activation_params[0] = relu->slope;
1633
            }
1634
        }
1635
        else if (activation->type == "Clip")
1636
        {
1637
            ncnn::Clip* clip = (ncnn::Clip*)activation;
1638

1639
            deconvolutiondepthwise->activation_type = 3;
1640
            deconvolutiondepthwise->activation_params = ncnn::Mat(2);
1641
            deconvolutiondepthwise->activation_params[0] = clip->min;
1642
            deconvolutiondepthwise->activation_params[1] = clip->max;
1643
        }
1644
        else if (activation->type == "Sigmoid")
1645
        {
1646
            deconvolutiondepthwise->activation_type = 4;
1647
        }
1648

1649
        int top_blob_index_final = activation->tops[0];
1650
        deconvolutiondepthwise->tops[0] = top_blob_index_final;
1651
        blobs[top_blob_index_final].producer = i;
1652
        activation->type = "ncnnfused";
1653
    }
1654

1655
    return 0;
1656
}
1657

1658
int NetOptimize::fuse_innerproduct_activation()
1659
{
1660
    const size_t layer_count = layers.size();
1661
    for (size_t i = 0; i < layer_count; i++)
1662
    {
1663
        if (layers[i]->type != "InnerProduct")
1664
            continue;
1665

1666
        // InnerProduct - Activation
1667
        int top_blob_index = layers[i]->tops[0];
1668

1669
        size_t j = i + 1;
1670
        for (; j < layer_count; j++)
1671
        {
1672
            if (layers[j]->type != "ReLU" && layers[j]->type != "Clip" && layers[j]->type != "Sigmoid" && layers[j]->type != "Mish" && layers[j]->type != "HardSwish")
1673
                continue;
1674

1675
            if (layers[j]->bottoms.size() != 1)
1676
                continue;
1677

1678
            if (layers[j]->bottoms[0] == top_blob_index)
1679
                break;
1680
        }
1681

1682
        if (j == layer_count)
1683
            continue;
1684

1685
        // fuse InnerProduct - Activation to InnerProduct
1686
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
1687
        ncnn::Layer* activation = layers[j];
1688

1689
        fprintf(stderr, "fuse_innerproduct_activation %s %s\n", innerproduct->name.c_str(), activation->name.c_str());
1690

1691
        if (activation->type == "ReLU")
1692
        {
1693
            ncnn::ReLU* relu = (ncnn::ReLU*)activation;
1694

1695
            if (relu->slope == 0.f)
1696
            {
1697
                innerproduct->activation_type = 1;
1698
            }
1699
            else
1700
            {
1701
                innerproduct->activation_type = 2;
1702
                innerproduct->activation_params = ncnn::Mat(1);
1703
                innerproduct->activation_params[0] = relu->slope;
1704
            }
1705
        }
1706
        else if (activation->type == "Clip")
1707
        {
1708
            ncnn::Clip* clip = (ncnn::Clip*)activation;
1709

1710
            innerproduct->activation_type = 3;
1711
            innerproduct->activation_params = ncnn::Mat(2);
1712
            innerproduct->activation_params[0] = clip->min;
1713
            innerproduct->activation_params[1] = clip->max;
1714
        }
1715
        else if (activation->type == "Sigmoid")
1716
        {
1717
            innerproduct->activation_type = 4;
1718
        }
1719
        else if (activation->type == "Mish")
1720
        {
1721
            innerproduct->activation_type = 5;
1722
        }
1723
        else if (activation->type == "HardSwish")
1724
        {
1725
            ncnn::HardSwish* hardswish = (ncnn::HardSwish*)activation;
1726

1727
            innerproduct->activation_type = 6;
1728
            innerproduct->activation_params = ncnn::Mat(2);
1729
            innerproduct->activation_params[0] = hardswish->alpha;
1730
            innerproduct->activation_params[1] = hardswish->beta;
1731
        }
1732

1733
        int top_blob_index_final = activation->tops[0];
1734
        innerproduct->tops[0] = top_blob_index_final;
1735
        blobs[top_blob_index_final].producer = i;
1736
        activation->type = "ncnnfused";
1737
    }
1738

1739
    return 0;
1740
}
1741

1742
int NetOptimize::fuse_memorydata_binaryop()
1743
{
1744
    const size_t layer_count = layers.size();
1745
    for (size_t i = 0; i < layer_count; i++)
1746
    {
1747
        if (layers[i]->type != "MemoryData")
1748
            continue;
1749

1750
        // MemoryData - BinaryOp
1751
        int top_blob_index = layers[i]->tops[0];
1752

1753
        size_t j = i + 1;
1754
        for (; j < layer_count; j++)
1755
        {
1756
            if (layers[j]->type != "BinaryOp")
1757
                continue;
1758

1759
            if (layers[j]->bottoms.size() != 2)
1760
                continue;
1761

1762
            if (layers[j]->bottoms[0] == top_blob_index || layers[j]->bottoms[1] == top_blob_index)
1763
                break;
1764
        }
1765

1766
        if (j == layer_count)
1767
            continue;
1768

1769
        // fuse MemoryData - BinaryOp to BinaryOp
1770
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[i];
1771
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
1772

1773
        if (memorydata->w != 1 || memorydata->h != 0 || memorydata->c != 0)
1774
        {
1775
            // not a scalar
1776
            continue;
1777
        }
1778

1779
        int memorydata_index = 1;
1780

1781
        if (binaryop->bottoms[0] == top_blob_index)
1782
        {
1783
            int op_type = binaryop->op_type;
1784

1785
            if (op_type == ncnn::BinaryOp::Operation_ADD
1786
                    || op_type == ncnn::BinaryOp::Operation_MUL
1787
                    || op_type == ncnn::BinaryOp::Operation_MAX
1788
                    || op_type == ncnn::BinaryOp::Operation_MIN)
1789
            {
1790
                memorydata_index = 0;
1791
            }
1792
            else if (op_type == ncnn::BinaryOp::Operation_SUB)
1793
            {
1794
                binaryop->op_type = ncnn::BinaryOp::Operation_RSUB;
1795
                memorydata_index = 0;
1796
            }
1797
            else if (op_type == ncnn::BinaryOp::Operation_DIV)
1798
            {
1799
                binaryop->op_type = ncnn::BinaryOp::Operation_RDIV;
1800
                memorydata_index = 0;
1801
            }
1802
            else
1803
            {
1804
                // non interchangeable binaryop
1805
                continue;
1806
            }
1807
        }
1808

1809
        float scalar = memorydata->data[0];
1810

1811
        binaryop->with_scalar = 1;
1812
        binaryop->b = scalar;
1813

1814
        fprintf(stderr, "fuse_memorydata_binaryop %s %s\n", memorydata->name.c_str(), binaryop->name.c_str());
1815

1816
        binaryop->bottoms.erase(binaryop->bottoms.begin() + memorydata_index);
1817
        memorydata->type = "ncnnfused";
1818
    }
1819

1820
    for (size_t i = 0; i < layer_count; i++)
1821
    {
1822
        if (layers[i]->type != "MemoryData")
1823
            continue;
1824

1825
        // MemoryData - Split - BinaryOp
1826
        int top_blob_index = layers[i]->tops[0];
1827

1828
        size_t j0 = i + 1;
1829
        for (; j0 < layer_count; j0++)
1830
        {
1831
            if (layers[j0]->type != "Split")
1832
                continue;
1833

1834
            if (layers[j0]->bottoms.size() != 1)
1835
                continue;
1836

1837
            if (layers[j0]->bottoms[0] == top_blob_index)
1838
                break;
1839
        }
1840

1841
        if (j0 == layer_count)
1842
            continue;
1843

1844
        int split_top_blob_index = -1;
1845

1846
        size_t j1 = j0 + 1;
1847
        for (; j1 < layer_count; j1++)
1848
        {
1849
            if (layers[j1]->type != "BinaryOp")
1850
                continue;
1851

1852
            if (layers[j1]->bottoms.size() != 2)
1853
                continue;
1854

1855
            for (int k = 0; k < (int)layers[j0]->tops.size(); k++)
1856
            {
1857
                if (layers[j1]->bottoms[0] == layers[j0]->tops[k] || layers[j1]->bottoms[1] == layers[j0]->tops[k])
1858
                {
1859
                    split_top_blob_index = k;
1860
                    break;
1861
                }
1862
            }
1863

1864
            if (split_top_blob_index != -1)
1865
                break;
1866
        }
1867

1868
        if (j1 == layer_count)
1869
            continue;
1870

1871
        // fuse MemoryData - Split - BinaryOp to BinaryOp
1872
        ncnn::MemoryData* memorydata = (ncnn::MemoryData*)layers[i];
1873
        ncnn::Split* split = (ncnn::Split*)layers[j0];
1874
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j1];
1875

1876
        if (memorydata->w != 1 || memorydata->h != 0 || memorydata->c != 0)
1877
        {
1878
            // not a scalar
1879
            continue;
1880
        }
1881

1882
        int memorydata_index = 1;
1883

1884
        if (binaryop->bottoms[0] == split->tops[split_top_blob_index])
1885
        {
1886
            int op_type = binaryop->op_type;
1887

1888
            if (op_type == ncnn::BinaryOp::Operation_ADD
1889
                    || op_type == ncnn::BinaryOp::Operation_MUL
1890
                    || op_type == ncnn::BinaryOp::Operation_MAX
1891
                    || op_type == ncnn::BinaryOp::Operation_MIN)
1892
            {
1893
                memorydata_index = 0;
1894
            }
1895
            else if (op_type == ncnn::BinaryOp::Operation_SUB)
1896
            {
1897
                binaryop->op_type = ncnn::BinaryOp::Operation_RSUB;
1898
                memorydata_index = 0;
1899
            }
1900
            else if (op_type == ncnn::BinaryOp::Operation_DIV)
1901
            {
1902
                binaryop->op_type = ncnn::BinaryOp::Operation_RDIV;
1903
                memorydata_index = 0;
1904
            }
1905
            else
1906
            {
1907
                // non interchangeable binaryop
1908
                continue;
1909
            }
1910
        }
1911

1912
        float scalar = memorydata->data[0];
1913

1914
        binaryop->with_scalar = 1;
1915
        binaryop->b = scalar;
1916

1917
        fprintf(stderr, "fuse_memorydata_binaryop %s %s\n", memorydata->name.c_str(), binaryop->name.c_str());
1918

1919
        binaryop->bottoms.erase(binaryop->bottoms.begin() + memorydata_index);
1920
        split->tops.erase(split->tops.begin() + split_top_blob_index);
1921
        if (split->tops.empty())
1922
        {
1923
            split->type = "ncnnfused";
1924
            memorydata->type = "ncnnfused";
1925
        }
1926

1927
        i--;
1928
    }
1929

1930
    return 0;
1931
}
1932

1933
int NetOptimize::fuse_binaryop_eltwise()
1934
{
1935
    const size_t layer_count = layers.size();
1936
    for (size_t i = 0; i < layer_count; i++)
1937
    {
1938
        if (layers[i]->type != "BinaryOp")
1939
            continue;
1940

1941
        if (layers[i]->bottoms.size() != 2)
1942
            continue;
1943

1944
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[i];
1945

1946
        if (binaryop->op_type != ncnn::BinaryOp::Operation_ADD)
1947
            continue;
1948

1949
        if (binaryop->with_scalar)
1950
            continue;
1951

1952
        // BinaryOp - BinaryOp - BinaryOp
1953
        int bottom_blob_index_0 = binaryop->bottoms[0];
1954
        int bottom_blob_index_1 = binaryop->bottoms[1];
1955

1956
        size_t j0 = 0;
1957
        for (; j0 < i; j0++)
1958
        {
1959
            if (layers[j0]->type != "BinaryOp")
1960
                continue;
1961

1962
            if (layers[j0]->bottoms.size() != 1)
1963
                continue;
1964

1965
            if (((ncnn::BinaryOp*)layers[j0])->op_type != ncnn::BinaryOp::Operation_MUL)
1966
                continue;
1967

1968
            if (layers[j0]->tops[0] == bottom_blob_index_0)
1969
                break;
1970
        }
1971

1972
        size_t j1 = 0;
1973
        for (; j1 < i; j1++)
1974
        {
1975
            if (layers[j1]->type != "BinaryOp")
1976
                continue;
1977

1978
            if (layers[j1]->bottoms.size() != 1)
1979
                continue;
1980

1981
            if (((ncnn::BinaryOp*)layers[j1])->op_type != ncnn::BinaryOp::Operation_MUL)
1982
                continue;
1983

1984
            if (layers[j1]->tops[0] == bottom_blob_index_1)
1985
                break;
1986
        }
1987

1988
        if (j0 == i && j1 == i)
1989
            continue;
1990

1991
        ncnn::BinaryOp* binaryop0 = (ncnn::BinaryOp*)layers[j0];
1992
        ncnn::BinaryOp* binaryop1 = (ncnn::BinaryOp*)layers[j1];
1993

1994
        fprintf(stderr, "fuse_binaryop_eltwise %s %s %s\n", binaryop0->name.c_str(), binaryop1->name.c_str(), binaryop->name.c_str());
1995

1996
        ncnn::Eltwise* eltwise = (ncnn::Eltwise*)ncnn::create_layer_cpu("Eltwise");
1997

1998
        eltwise->type = "Eltwise";
1999
        eltwise->name = binaryop->name;
2000
        eltwise->bottoms = binaryop->bottoms;
2001
        eltwise->tops = binaryop->tops;
2002

2003
        ncnn::ParamDict pd;
2004
        eltwise->load_param(pd);
2005

2006
        eltwise->op_type = ncnn::Eltwise::Operation_SUM;
2007

2008
        eltwise->coeffs = ncnn::Mat(2);
2009

2010
        if (j0 != i && j1 != i)
2011
        {
2012
            // fuse BinaryOp - BinaryOp - BinaryOp to Eltwise
2013
            eltwise->coeffs[0] = binaryop0->b;
2014
            eltwise->coeffs[1] = binaryop1->b;
2015

2016
            eltwise->bottoms[0] = binaryop0->bottoms[0];
2017
            eltwise->bottoms[1] = binaryop1->bottoms[0];
2018

2019
            binaryop0->type = "ncnnfused";
2020
            binaryop1->type = "ncnnfused";
2021
        }
2022
        if (j0 != i && j1 == i)
2023
        {
2024
            // fuse BinaryOp - X - BinaryOp to Eltwise
2025
            eltwise->coeffs[0] = binaryop0->b;
2026
            eltwise->coeffs[1] = 1.f;
2027

2028
            eltwise->bottoms[0] = binaryop0->bottoms[0];
2029

2030
            binaryop0->type = "ncnnfused";
2031
        }
2032
        if (j0 == i && j1 != i)
2033
        {
2034
            // fuse X - BinaryOp - BinaryOp to Eltwise
2035
            eltwise->coeffs[0] = 1.f;
2036
            eltwise->coeffs[1] = binaryop1->b;
2037

2038
            eltwise->bottoms[1] = binaryop1->bottoms[0];
2039

2040
            binaryop1->type = "ncnnfused";
2041
        }
2042

2043
        layers[i] = eltwise;
2044
        delete binaryop;
2045
    }
2046

2047
    return 0;
2048
}
2049

2050
int NetOptimize::eliminate_dropout()
2051
{
2052
    const size_t layer_count = layers.size();
2053
    for (size_t i = 0; i < layer_count; i++)
2054
    {
2055
        if (layers[i]->type != "Dropout")
2056
            continue;
2057

2058
        ncnn::Dropout* dropout = (ncnn::Dropout*)layers[i];
2059
        if (dropout->scale != 1.f)
2060
            continue;
2061

2062
        // Any - Dropout
2063
        int bottom_blob_index = layers[i]->bottoms[0];
2064

2065
        int j = i - 1;
2066
        for (; j >= 0; j--)
2067
        {
2068
            if (layers[j]->type == "ncnnfused")
2069
                continue;
2070

2071
            if (layers[j]->tops.size() != 1)
2072
                continue;
2073

2074
            if (layers[j]->tops[0] == bottom_blob_index)
2075
                break;
2076
        }
2077

2078
        if (j == -1)
2079
            continue;
2080

2081
        ncnn::Layer* any = layers[j];
2082

2083
        fprintf(stderr, "eliminate_dropout %s %s\n", any->name.c_str(), dropout->name.c_str());
2084

2085
        int top_blob_index_final = dropout->tops[0];
2086
        any->tops[0] = top_blob_index_final;
2087
        blobs[top_blob_index_final].producer = j;
2088
        dropout->type = "ncnnfused";
2089
    }
2090

2091
    return 0;
2092
}
2093

2094
int NetOptimize::eliminate_pooling1x1()
2095
{
2096
    const size_t layer_count = layers.size();
2097
    for (size_t i = 0; i < layer_count; i++)
2098
    {
2099
        if (layers[i]->type != "Pooling")
2100
            continue;
2101

2102
        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
2103
        if (pooling->pad_left != 0 || pooling->pad_right != 0 || pooling->pad_top != 0 || pooling->pad_bottom != 0)
2104
            continue;
2105

2106
        if (pooling->kernel_w != 1 || pooling->kernel_h != 1 || pooling->stride_w != 1 || pooling->stride_h != 1)
2107
            continue;
2108

2109
        if (pooling->global_pooling != 0)
2110
            continue;
2111

2112
        // Any - Pooling
2113
        int bottom_blob_index = layers[i]->bottoms[0];
2114

2115
        int top_i = -1;
2116
        int j = i - 1;
2117
        for (; j >= 0; j--)
2118
        {
2119
            if (layers[j]->type == "ncnnfused")
2120
                continue;
2121

2122
            for (size_t k = 0; k < layers[j]->tops.size(); k++)
2123
            {
2124
                if (layers[j]->tops[k] == bottom_blob_index)
2125
                {
2126
                    top_i = k;
2127
                    break;
2128
                }
2129
            }
2130

2131
            if (top_i != -1)
2132
                break;
2133
        }
2134

2135
        if (j == -1)
2136
            continue;
2137

2138
        ncnn::Layer* any = layers[j];
2139

2140
        fprintf(stderr, "eliminate_pooling1x1 %s %s\n", any->name.c_str(), pooling->name.c_str());
2141

2142
        int top_blob_index_final = pooling->tops[0];
2143
        any->tops[top_i] = top_blob_index_final;
2144
        blobs[top_blob_index_final].producer = j;
2145
        pooling->type = "ncnnfused";
2146
    }
2147

2148
    return 0;
2149
}
2150

2151
int NetOptimize::eliminate_noop()
2152
{
2153
    const size_t layer_count = layers.size();
2154
    for (size_t i = 0; i < layer_count; i++)
2155
    {
2156
        if (layers[i]->type != "Noop")
2157
            continue;
2158

2159
        ncnn::Layer* noop = layers[i];
2160

2161
        if (noop->bottoms.empty())
2162
        {
2163
            // Noop
2164
            fprintf(stderr, "eliminate_noop %s\n", noop->name.c_str());
2165

2166
            size_t top_blob_count = noop->tops.size();
2167
            for (size_t j = 0; j < top_blob_count; j++)
2168
            {
2169
                int top_blob_index_final = noop->tops[j];
2170
                blobs[top_blob_index_final].producer = -1;
2171
            }
2172
            noop->type = "ncnnfused";
2173

2174
            continue;
2175
        }
2176

2177
        // Any - Noop
2178
        int bottom_blob_index = noop->bottoms[0];
2179

2180
        int j = i - 1;
2181
        int any_k = -1;
2182
        for (; j >= 0; j--)
2183
        {
2184
            if (layers[j]->type == "ncnnfused")
2185
                continue;
2186

2187
            bool link_noop = false;
2188
            size_t top_blob_count = layers[j]->tops.size();
2189
            for (size_t k = 0; k < top_blob_count; k++)
2190
            {
2191
                if (layers[j]->tops[k] == bottom_blob_index)
2192
                {
2193
                    link_noop = true;
2194
                    any_k = k;
2195
                    break;
2196
                }
2197
            }
2198

2199
            if (link_noop)
2200
                break;
2201
        }
2202

2203
        if (j == -1 || any_k == -1)
2204
            continue;
2205

2206
        ncnn::Layer* any = layers[j];
2207

2208
        fprintf(stderr, "eliminate_noop %s %s\n", any->name.c_str(), noop->name.c_str());
2209

2210
        int top_blob_index_final = noop->tops[0];
2211
        any->tops[any_k] = top_blob_index_final;
2212
        blobs[top_blob_index_final].producer = j;
2213

2214
        noop->type = "ncnnfused";
2215
    }
2216

2217
    return 0;
2218
}
2219

2220
int NetOptimize::eliminate_split()
2221
{
2222
    const size_t layer_count = layers.size();
2223
    for (size_t i = 0; i < layer_count; i++)
2224
    {
2225
        if (layers[i]->type != "Split")
2226
            continue;
2227

2228
        ncnn::Layer* split = layers[i];
2229

2230
        int real_split_output_count = 0;
2231
        int real_split_top_blob_index = -1;
2232
        size_t top_blob_count = split->tops.size();
2233
        for (size_t j = 0; j < top_blob_count; j++)
2234
        {
2235
            int top_blob_index_final = split->tops[j];
2236
            if (blobs[top_blob_index_final].consumer != -1)
2237
            {
2238
                real_split_output_count += 1;
2239
                real_split_top_blob_index = j;
2240
            }
2241
        }
2242

2243
        if (real_split_output_count > 1)
2244
            continue;
2245

2246
        // Any - Pooling
2247
        int bottom_blob_index = split->bottoms[0];
2248

2249
        int top_i = -1;
2250
        int j = i - 1;
2251
        for (; j >= 0; j--)
2252
        {
2253
            if (layers[j]->type == "ncnnfused")
2254
                continue;
2255

2256
            for (size_t k = 0; k < layers[j]->tops.size(); k++)
2257
            {
2258
                if (layers[j]->tops[k] == bottom_blob_index)
2259
                {
2260
                    top_i = k;
2261
                    break;
2262
                }
2263
            }
2264

2265
            if (top_i != -1)
2266
                break;
2267
        }
2268

2269
        if (j == -1)
2270
            continue;
2271

2272
        ncnn::Layer* any = layers[j];
2273

2274
        fprintf(stderr, "eliminate_split %s %s\n", any->name.c_str(), split->name.c_str());
2275

2276
        int top_blob_index_final = split->tops[real_split_top_blob_index];
2277
        any->tops[top_i] = top_blob_index_final;
2278
        blobs[top_blob_index_final].producer = j;
2279
        split->type = "ncnnfused";
2280
    }
2281

2282
    return 0;
2283
}
2284

2285
int NetOptimize::eliminate_orphaned_memorydata()
2286
{
2287
    const size_t layer_count = layers.size();
2288
    for (size_t i = 0; i < layer_count; i++)
2289
    {
2290
        if (layers[i]->type != "MemoryData")
2291
            continue;
2292

2293
        // MemoryData - X
2294
        int top_blob_index = layers[i]->tops[0];
2295

2296
        size_t j = i + 1;
2297
        for (; j < layer_count; j++)
2298
        {
2299
            if (layers[j]->type == "ncnnfused")
2300
                continue;
2301

2302
            bool orphaned = true;
2303
            for (size_t k = 0; k < layers[j]->bottoms.size(); k++)
2304
            {
2305
                if (layers[j]->bottoms[k] == top_blob_index)
2306
                {
2307
                    orphaned = false;
2308
                    break;
2309
                }
2310
            }
2311

2312
            if (!orphaned)
2313
                break;
2314
        }
2315

2316
        if (j < layer_count)
2317
            continue;
2318

2319
        // assert orphaned == true
2320
        fprintf(stderr, "eliminate_orphaned_memorydata %s\n", layers[i]->name.c_str());
2321

2322
        layers[i]->type = "ncnnfused";
2323
    }
2324

2325
    return 0;
2326
}
2327

2328
int NetOptimize::eliminate_reshape_after_global_pooling()
2329
{
2330
    const size_t layer_count = layers.size();
2331
    for (size_t i = 0; i < layer_count; i++)
2332
    {
2333
        if (layers[i]->type != "Pooling")
2334
            continue;
2335

2336
        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
2337
        if (pooling->global_pooling == 0)
2338
            continue;
2339

2340
        // Pooling - Reshape
2341
        int top_blob_index = layers[i]->tops[0];
2342

2343
        size_t j = i + 1;
2344
        for (; j < layer_count; j++)
2345
        {
2346
            if (layers[j]->type != "Reshape")
2347
                continue;
2348

2349
            if (layers[j]->bottoms.size() != 1)
2350
                continue;
2351

2352
            if (layers[j]->bottoms[0] == top_blob_index)
2353
                break;
2354
        }
2355

2356
        if (j == layer_count)
2357
            continue;
2358

2359
        ncnn::Reshape* reshape = (ncnn::Reshape*)layers[j];
2360
        if (reshape->h != -233 || reshape->c != -233 || reshape->permute != 0)
2361
            continue;
2362

2363
        fprintf(stderr, "eliminate_reshape_after_global_pooling %s %s\n", pooling->name.c_str(), reshape->name.c_str());
2364

2365
        int top_blob_index_final = reshape->tops[0];
2366
        pooling->tops[0] = top_blob_index_final;
2367
        blobs[top_blob_index_final].producer = i;
2368
        reshape->type = "ncnnfused";
2369
    }
2370

2371
    return 0;
2372
}
2373

2374
int NetOptimize::eliminate_flatten_after_global_pooling()
2375
{
2376
    const size_t layer_count = layers.size();
2377
    for (size_t i = 0; i < layer_count; i++)
2378
    {
2379
        if (layers[i]->type != "Pooling")
2380
            continue;
2381

2382
        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
2383
        if (pooling->global_pooling == 0)
2384
            continue;
2385

2386
        // Pooling - Flatten
2387
        int top_blob_index = layers[i]->tops[0];
2388

2389
        size_t j = i + 1;
2390
        for (; j < layer_count; j++)
2391
        {
2392
            if (layers[j]->type != "Flatten")
2393
                continue;
2394

2395
            if (layers[j]->bottoms.size() != 1)
2396
                continue;
2397

2398
            if (layers[j]->bottoms[0] == top_blob_index)
2399
                break;
2400
        }
2401

2402
        if (j == layer_count)
2403
            continue;
2404

2405
        ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j];
2406

2407
        fprintf(stderr, "eliminate_flatten_after_global_pooling %s %s\n", pooling->name.c_str(), flatten->name.c_str());
2408

2409
        int top_blob_index_final = flatten->tops[0];
2410
        pooling->tops[0] = top_blob_index_final;
2411
        blobs[top_blob_index_final].producer = i;
2412
        flatten->type = "ncnnfused";
2413
    }
2414

2415
    return 0;
2416
}
2417

2418
int NetOptimize::eliminate_flatten_after_innerproduct()
2419
{
2420
    const size_t layer_count = layers.size();
2421
    for (size_t i = 0; i < layer_count; i++)
2422
    {
2423
        if (layers[i]->type != "InnerProduct")
2424
            continue;
2425

2426
        // InnerProduct - Flatten
2427
        int top_blob_index = layers[i]->tops[0];
2428

2429
        size_t j = i + 1;
2430
        for (; j < layer_count; j++)
2431
        {
2432
            if (layers[j]->type != "Flatten")
2433
                continue;
2434

2435
            if (layers[j]->bottoms.size() != 1)
2436
                continue;
2437

2438
            if (layers[j]->bottoms[0] == top_blob_index)
2439
                break;
2440
        }
2441

2442
        if (j == layer_count)
2443
            continue;
2444

2445
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
2446
        ncnn::Flatten* flatten = (ncnn::Flatten*)layers[j];
2447

2448
        fprintf(stderr, "eliminate_flatten_after_innerproduct %s %s\n", innerproduct->name.c_str(), flatten->name.c_str());
2449

2450
        int top_blob_index_final = flatten->tops[0];
2451
        innerproduct->tops[0] = top_blob_index_final;
2452
        blobs[top_blob_index_final].producer = i;
2453
        flatten->type = "ncnnfused";
2454
    }
2455

2456
    return 0;
2457
}
2458

2459
int NetOptimize::eliminate_reshape_before_binaryop()
2460
{
2461
    const size_t layer_count = layers.size();
2462
    for (size_t i = 0; i < layer_count; i++)
2463
    {
2464
        if (layers[i]->type != "Reshape")
2465
            continue;
2466

2467
        ncnn::Reshape* reshape = (ncnn::Reshape*)layers[i];
2468
        if (reshape->w != 1 || reshape->h != 1 || reshape->permute != 0)
2469
            continue;
2470

2471
        // Reshape - BinaryOp
2472
        int top_blob_index = layers[i]->tops[0];
2473

2474
        size_t j = i + 1;
2475
        for (; j < layer_count; j++)
2476
        {
2477
            if (layers[j]->type != "BinaryOp")
2478
                continue;
2479

2480
            if (layers[j]->bottoms.size() != 2)
2481
                continue;
2482

2483
            if (layers[j]->bottoms[0] == top_blob_index || layers[j]->bottoms[1] == top_blob_index)
2484
                break;
2485
        }
2486

2487
        if (j == layer_count)
2488
            continue;
2489

2490
        ncnn::BinaryOp* binaryop = (ncnn::BinaryOp*)layers[j];
2491

2492
        fprintf(stderr, "eliminate_reshape_before_binaryop %s %s\n", reshape->name.c_str(), binaryop->name.c_str());
2493

2494
        int bottom_blob_index_final = reshape->bottoms[0];
2495
        if (layers[j]->bottoms[0] == top_blob_index)
2496
            binaryop->bottoms[0] = bottom_blob_index_final;
2497
        if (layers[j]->bottoms[1] == top_blob_index)
2498
            binaryop->bottoms[1] = bottom_blob_index_final;
2499
        blobs[bottom_blob_index_final].consumer = j;
2500
        reshape->type = "ncnnfused";
2501
    }
2502

2503
    return 0;
2504
}
2505

2506
int NetOptimize::replace_reduction_with_global_pooling()
2507
{
2508
    const size_t layer_count = layers.size();
2509
    for (size_t i = 0; i < layer_count; i++)
2510
    {
2511
        if (layers[i]->type != "Reduction")
2512
            continue;
2513

2514
        ncnn::Reduction* reduction1 = (ncnn::Reduction*)layers[i];
2515
        if (reduction1->operation != 3 || reduction1->reduce_all != 0 || reduction1->coeff != 1.f)
2516
            continue;
2517

2518
        if (reduction1->axes.w != 1)
2519
            continue;
2520

2521
        const int* axes_ptr = reduction1->axes;
2522
        if (axes_ptr[0] != 2 && axes_ptr[0] != 3)
2523
            continue;
2524

2525
        // Reduction(2/3) - Reduction(2)
2526
        int top_blob_index = layers[i]->tops[0];
2527

2528
        size_t j = i + 1;
2529
        for (; j < layer_count; j++)
2530
        {
2531
            if (layers[j]->type != "Reduction")
2532
                continue;
2533

2534
            if (layers[j]->bottoms.size() != 1)
2535
                continue;
2536

2537
            if (layers[j]->bottoms[0] == top_blob_index)
2538
                break;
2539
        }
2540

2541
        if (j == layer_count)
2542
            continue;
2543

2544
        ncnn::Reduction* reduction2 = (ncnn::Reduction*)layers[j];
2545
        if (reduction2->operation != 3 || reduction2->reduce_all != 0 || reduction2->coeff != 1.f)
2546
            continue;
2547

2548
        if (reduction2->axes.w != 1)
2549
            continue;
2550

2551
        const int* axes2_ptr = reduction2->axes;
2552
        if (axes2_ptr[0] != 2)
2553
            continue;
2554

2555
        fprintf(stderr, "replace_reduction_with_global_pooling %s %s\n", reduction1->name.c_str(), reduction2->name.c_str());
2556

2557
        ncnn::Pooling* pooling = (ncnn::Pooling*)ncnn::create_layer_cpu("Pooling");
2558

2559
        pooling->type = "Pooling";
2560
        pooling->name = reduction2->name;
2561
        pooling->bottoms = reduction2->bottoms;
2562
        pooling->tops = reduction2->tops;
2563

2564
        ncnn::ParamDict pd;
2565
        pooling->load_param(pd);
2566

2567
        pooling->pooling_type = 1;
2568
        pooling->global_pooling = 1;
2569

2570
        layers[j] = pooling;
2571
        delete reduction2;
2572

2573
        int bottom_blob_index_final = reduction1->bottoms[0];
2574
        pooling->bottoms[0] = bottom_blob_index_final;
2575
        blobs[bottom_blob_index_final].consumer = j;
2576
        reduction1->type = "ncnnfused";
2577
    }
2578

2579
    return 0;
2580
}
2581

2582
int NetOptimize::replace_prelu_with_leaky_relu()
2583
{
2584
    const size_t layer_count = layers.size();
2585
    for (size_t i = 0; i < layer_count; i++)
2586
    {
2587
        if (layers[i]->type != "PReLU")
2588
            continue;
2589

2590
        ncnn::PReLU* prelu = (ncnn::PReLU*)layers[i];
2591
        if (prelu->num_slope != 1)
2592
            continue;
2593

2594
        fprintf(stderr, "replace_prelu_with_leaky_relu %s\n", prelu->name.c_str());
2595

2596
        ncnn::ReLU* relu = (ncnn::ReLU*)ncnn::create_layer_cpu("ReLU");
2597

2598
        relu->type = "ReLU";
2599
        relu->name = prelu->name;
2600
        relu->bottoms = prelu->bottoms;
2601
        relu->tops = prelu->tops;
2602

2603
        ncnn::ParamDict pd;
2604
        relu->load_param(pd);
2605

2606
        relu->slope = prelu->slope_data[0];
2607

2608
        layers[i] = relu;
2609
        delete prelu;
2610
    }
2611

2612
    return 0;
2613
}
2614

2615
int NetOptimize::replace_convolution_with_innerproduct_after_global_pooling()
2616
{
2617
    const size_t layer_count = layers.size();
2618
    for (size_t i = 0; i < layer_count; i++)
2619
    {
2620
        if (layers[i]->type != "Pooling")
2621
            continue;
2622

2623
        ncnn::Pooling* pooling = (ncnn::Pooling*)layers[i];
2624
        if (pooling->global_pooling == 0)
2625
            continue;
2626

2627
        // Pooling - Convolution
2628
        int top_blob_index = layers[i]->tops[0];
2629

2630
        size_t j = i + 1;
2631
        for (; j < layer_count; j++)
2632
        {
2633
            if (layers[j]->type != "Convolution")
2634
                continue;
2635

2636
            if (layers[j]->bottoms.size() != 1)
2637
                continue;
2638

2639
            if (layers[j]->bottoms[0] == top_blob_index)
2640
                break;
2641
        }
2642

2643
        if (j == layer_count)
2644
            continue;
2645

2646
        ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j];
2647

2648
        fprintf(stderr, "replace_convolution_with_innerproduct_after_global_pooling %s %s\n", pooling->name.c_str(), convolution->name.c_str());
2649

2650
        ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)ncnn::create_layer_cpu("InnerProduct");
2651

2652
        innerproduct->type = "InnerProduct";
2653
        innerproduct->name = convolution->name;
2654
        innerproduct->bottoms = convolution->bottoms;
2655
        innerproduct->tops = convolution->tops;
2656

2657
        ncnn::ParamDict pd;
2658
        innerproduct->load_param(pd);
2659

2660
        innerproduct->num_output = convolution->num_output;
2661
        innerproduct->bias_term = convolution->bias_term;
2662
        innerproduct->weight_data_size = convolution->weight_data_size;
2663
        innerproduct->int8_scale_term = convolution->int8_scale_term;
2664

2665
        innerproduct->weight_data = convolution->weight_data;
2666
        innerproduct->bias_data = convolution->bias_data;
2667
#if NCNN_INT8
2668
        innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales;
2669
        innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales;
2670
#endif
2671

2672
        innerproduct->activation_type = convolution->activation_type;
2673
        innerproduct->activation_params = convolution->activation_params;
2674

2675
        layers[j] = innerproduct;
2676
        delete convolution;
2677
    }
2678

2679
    return 0;
2680
}
2681

2682
int NetOptimize::replace_convolution_with_innerproduct_after_innerproduct()
2683
{
2684
    const size_t layer_count = layers.size();
2685
    for (;;)
2686
    {
2687
        bool replaced = false;
2688

2689
        for (size_t i = 0; i < layer_count; i++)
2690
        {
2691
            if (layers[i]->type != "InnerProduct")
2692
                continue;
2693

2694
            // InnerProduct - Convolution
2695
            int top_blob_index = layers[i]->tops[0];
2696

2697
            size_t j = i + 1;
2698
            for (; j < layer_count; j++)
2699
            {
2700
                if (layers[j]->type != "Convolution")
2701
                    continue;
2702

2703
                if (layers[j]->bottoms.size() != 1)
2704
                    continue;
2705

2706
                if (layers[j]->bottoms[0] == top_blob_index)
2707
                    break;
2708
            }
2709

2710
            if (j == layer_count)
2711
                continue;
2712

2713
            ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layers[i];
2714
            ncnn::Convolution* convolution = (ncnn::Convolution*)layers[j];
2715

2716
            fprintf(stderr, "replace_convolution_with_innerproduct_after_innerproduct %s %s\n", innerproduct->name.c_str(), convolution->name.c_str());
2717

2718
            ncnn::InnerProduct* innerproduct2 = (ncnn::InnerProduct*)ncnn::create_layer_cpu("InnerProduct");
2719

2720
            innerproduct2->type = "InnerProduct";
2721
            innerproduct2->name = convolution->name;
2722
            innerproduct2->bottoms = convolution->bottoms;
2723
            innerproduct2->tops = convolution->tops;
2724

2725
            ncnn::ParamDict pd;
2726
            innerproduct2->load_param(pd);
2727

2728
            innerproduct2->num_output = convolution->num_output;
2729
            innerproduct2->bias_term = convolution->bias_term;
2730
            innerproduct2->weight_data_size = convolution->weight_data_size;
2731
            innerproduct->int8_scale_term = convolution->int8_scale_term;
2732

2733
            innerproduct2->weight_data = convolution->weight_data;
2734
            innerproduct2->bias_data = convolution->bias_data;
2735
#if NCNN_INT8
2736
            innerproduct->weight_data_int8_scales = convolution->weight_data_int8_scales;
2737
            innerproduct->bottom_blob_int8_scales = convolution->bottom_blob_int8_scales;
2738
#endif
2739

2740
            innerproduct2->activation_type = convolution->activation_type;
2741
            innerproduct2->activation_params = convolution->activation_params;
2742

2743
            layers[j] = innerproduct2;
2744
            delete convolution;
2745

2746
            replaced = true;
2747
        }
2748

2749
        if (!replaced)
2750
            break;
2751
    }
2752

2753
    return 0;
2754
}
2755

2756
int main(int argc, char** argv)
2757
{
2758
    if (argc < 6)
2759
    {
2760
        fprintf(stderr, "usage: %s [inparam] [inbin] [outparam] [outbin] [flag] [cutstart] [cutend]\n", argv[0]);
2761
        return -1;
2762
    }
2763

2764
    const char* inparam = argv[1];
2765
    const char* inbin = argv[2];
2766
    const char* outparam = argv[3];
2767
    const char* outbin = argv[4];
2768
    int flag = atoi(argv[5]);
2769
    const char* cutstartname = nullptr;
2770
    const char* cutendname = nullptr;
2771

2772
    if (argc > 6)
2773
    {
2774
        cutstartname = argv[6];
2775
    }
2776

2777
    if (argc > 7)
2778
    {
2779
        cutendname = argv[7];
2780
    }
2781

2782
    NetOptimize optimizer;
2783

2784
    if (flag == 65536 || flag == 1)
2785
    {
2786
        optimizer.storage_type = 1;
2787
    }
2788
    else
2789
    {
2790
        optimizer.storage_type = 0;
2791
    }
2792

2793
    optimizer.load_param(inparam);
2794

2795
    if (strcmp(inbin, "null") == 0)
2796
    {
2797
        DataReaderFromEmpty dr;
2798
        optimizer.load_model(dr);
2799
        optimizer.gen_random_weight = true;
2800
    }
2801
    else
2802
        optimizer.load_model(inbin);
2803

2804
    if (optimizer.set_cutparam(cutstartname, cutendname) < 0)
2805
    {
2806
        return -1;
2807
    }
2808

2809
    optimizer.fuse_batchnorm_scale();
2810
    optimizer.fuse_convolution_batchnorm();
2811
    optimizer.fuse_convolution_mul();
2812
    optimizer.fuse_convolution_add();
2813
    optimizer.fuse_convolutiondepthwise_batchnorm();
2814
    optimizer.fuse_convolutiondepthwise_mul();
2815
    optimizer.fuse_convolutiondepthwise_add();
2816
    optimizer.fuse_deconvolution_batchnorm();
2817
    optimizer.fuse_deconvolution_mul();
2818
    optimizer.fuse_deconvolution_add();
2819
    optimizer.fuse_deconvolutiondepthwise_batchnorm();
2820
    optimizer.fuse_innerproduct_batchnorm();
2821
    optimizer.fuse_innerproduct_add();
2822
    optimizer.fuse_innerproduct_dropout();
2823

2824
    optimizer.replace_reduction_with_global_pooling();
2825
    optimizer.replace_prelu_with_leaky_relu();
2826

2827
    optimizer.fuse_convolution_activation();
2828
    optimizer.fuse_convolutiondepthwise_activation();
2829
    optimizer.fuse_deconvolution_activation();
2830
    optimizer.fuse_deconvolutiondepthwise_activation();
2831
    optimizer.fuse_innerproduct_activation();
2832
    optimizer.fuse_memorydata_binaryop();
2833
    optimizer.fuse_binaryop_eltwise();
2834

2835
    optimizer.eliminate_dropout();
2836
    optimizer.eliminate_pooling1x1();
2837
    optimizer.eliminate_noop();
2838
    optimizer.eliminate_split();
2839
    optimizer.eliminate_flatten_after_global_pooling();
2840
    optimizer.eliminate_reshape_after_global_pooling();
2841
    optimizer.eliminate_reshape_before_binaryop();
2842

2843
    optimizer.replace_convolution_with_innerproduct_after_global_pooling();
2844
    optimizer.replace_convolution_with_innerproduct_after_innerproduct();
2845

2846
    optimizer.eliminate_flatten_after_innerproduct();
2847
    optimizer.eliminate_orphaned_memorydata();
2848

2849
    optimizer.shape_inference();
2850

2851
    optimizer.estimate_memory_footprint();
2852

2853
    optimizer.save(outparam, outbin);
2854

2855
    return 0;
2856
}
2857

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.